gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "diagnostic-core.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT, rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT, rtx);
  53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  54                                     unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT,
  56                                     unsigned HOST_WIDE_INT, rtx, int);
  57 static rtx mask_rtx (enum machine_mode, int, int, int);
  58 static rtx lshift_value (enum machine_mode, rtx, int, int);
  59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, int);
  61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64
  65 /* Test whether a value is zero of a power of two.  */
  66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 void
  98 init_expmed (void)
  99 {
 100   struct
 101   {
 102     struct rtx_def reg;         rtunion reg_fld[2];
 103     struct rtx_def plus;        rtunion plus_fld1;
 104     struct rtx_def neg;
 105     struct rtx_def mult;        rtunion mult_fld1;
 106     struct rtx_def sdiv;        rtunion sdiv_fld1;
 107     struct rtx_def udiv;        rtunion udiv_fld1;
 108     struct rtx_def zext;
 109     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 110     struct rtx_def smod_32;     rtunion smod_32_fld1;
 111     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 112     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 113     struct rtx_def wide_trunc;
 114     struct rtx_def shift;       rtunion shift_fld1;
 115     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 116     struct rtx_def shift_add;   rtunion shift_add_fld1;
 117     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 118     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 119   } all;
 120
 121   rtx pow2[MAX_BITS_PER_WORD];
 122   rtx cint[MAX_BITS_PER_WORD];
 123   int m, n;
 124   enum machine_mode mode, wider_mode;
 125   int speed;
 126
 127
 128   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 129     {
 130       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 131       cint[m] = GEN_INT (m);
 132     }
 133   memset (&all, 0, sizeof all);
 134
 135   PUT_CODE (&all.reg, REG);
 136   /* Avoid using hard regs in ways which may be unsupported.  */
 137   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 138
 139   PUT_CODE (&all.plus, PLUS);
 140   XEXP (&all.plus, 0) = &all.reg;
 141   XEXP (&all.plus, 1) = &all.reg;
 142
 143   PUT_CODE (&all.neg, NEG);
 144   XEXP (&all.neg, 0) = &all.reg;
 145
 146   PUT_CODE (&all.mult, MULT);
 147   XEXP (&all.mult, 0) = &all.reg;
 148   XEXP (&all.mult, 1) = &all.reg;
 149
 150   PUT_CODE (&all.sdiv, DIV);
 151   XEXP (&all.sdiv, 0) = &all.reg;
 152   XEXP (&all.sdiv, 1) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.sdiv_32, DIV);
 159   XEXP (&all.sdiv_32, 0) = &all.reg;
 160   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 161
 162   PUT_CODE (&all.smod_32, MOD);
 163   XEXP (&all.smod_32, 0) = &all.reg;
 164   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 165
 166   PUT_CODE (&all.zext, ZERO_EXTEND);
 167   XEXP (&all.zext, 0) = &all.reg;
 168
 169   PUT_CODE (&all.wide_mult, MULT);
 170   XEXP (&all.wide_mult, 0) = &all.zext;
 171   XEXP (&all.wide_mult, 1) = &all.zext;
 172
 173   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 174   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 175
 176   PUT_CODE (&all.wide_trunc, TRUNCATE);
 177   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 178
 179   PUT_CODE (&all.shift, ASHIFT);
 180   XEXP (&all.shift, 0) = &all.reg;
 181
 182   PUT_CODE (&all.shift_mult, MULT);
 183   XEXP (&all.shift_mult, 0) = &all.reg;
 184
 185   PUT_CODE (&all.shift_add, PLUS);
 186   XEXP (&all.shift_add, 0) = &all.shift_mult;
 187   XEXP (&all.shift_add, 1) = &all.reg;
 188
 189   PUT_CODE (&all.shift_sub0, MINUS);
 190   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 191   XEXP (&all.shift_sub0, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub1, MINUS);
 194   XEXP (&all.shift_sub1, 0) = &all.reg;
 195   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 196
 197   for (speed = 0; speed < 2; speed++)
 198     {
 199       crtl->maybe_hot_insn_p = speed;
 200       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 201
 202       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 203            mode != VOIDmode;
 204            mode = GET_MODE_WIDER_MODE (mode))
 205         {
 206           PUT_MODE (&all.reg, mode);
 207           PUT_MODE (&all.plus, mode);
 208           PUT_MODE (&all.neg, mode);
 209           PUT_MODE (&all.mult, mode);
 210           PUT_MODE (&all.sdiv, mode);
 211           PUT_MODE (&all.udiv, mode);
 212           PUT_MODE (&all.sdiv_32, mode);
 213           PUT_MODE (&all.smod_32, mode);
 214           PUT_MODE (&all.wide_trunc, mode);
 215           PUT_MODE (&all.shift, mode);
 216           PUT_MODE (&all.shift_mult, mode);
 217           PUT_MODE (&all.shift_add, mode);
 218           PUT_MODE (&all.shift_sub0, mode);
 219           PUT_MODE (&all.shift_sub1, mode);
 220
 221           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 222           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 223           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 224           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 225           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 226
 227           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 228                                           <= 2 * add_cost[speed][mode]);
 229           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 230                                           <= 4 * add_cost[speed][mode]);
 231
 232           wider_mode = GET_MODE_WIDER_MODE (mode);
 233           if (wider_mode != VOIDmode)
 234             {
 235               PUT_MODE (&all.zext, wider_mode);
 236               PUT_MODE (&all.wide_mult, wider_mode);
 237               PUT_MODE (&all.wide_lshr, wider_mode);
 238               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 239
 240               mul_widen_cost[speed][wider_mode]
 241                 = rtx_cost (&all.wide_mult, SET, speed);
 242               mul_highpart_cost[speed][mode]
 243                 = rtx_cost (&all.wide_trunc, SET, speed);
 244             }
 245
 246           shift_cost[speed][mode][0] = 0;
 247           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 248             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 249
 250           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 251           for (m = 1; m < n; m++)
 252             {
 253               XEXP (&all.shift, 1) = cint[m];
 254               XEXP (&all.shift_mult, 1) = pow2[m];
 255
 256               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 257               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 258               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 259               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 260             }
 261         }
 262     }
 263   default_rtl_profile ();
 264 }
 265
 266 /* Return an rtx representing minus the value of X.
 267    MODE is the intended mode of the result,
 268    useful if X is a CONST_INT.  */
 269
 270 rtx
 271 negate_rtx (enum machine_mode mode, rtx x)
 272 {
 273   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 274
 275   if (result == 0)
 276     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 277
 278   return result;
 279 }
 280
 281 /* Report on the availability of insv/extv/extzv and the desired mode
 282    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 283    is false; else the mode of the specified operand.  If OPNO is -1,
 284    all the caller cares about is whether the insn is available.  */
 285 enum machine_mode
 286 mode_for_extraction (enum extraction_pattern pattern, int opno)
 287 {
 288   const struct insn_data_d *data;
 289
 290   switch (pattern)
 291     {
 292     case EP_insv:
 293       if (HAVE_insv)
 294         {
 295           data = &insn_data[CODE_FOR_insv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     case EP_extv:
 301       if (HAVE_extv)
 302         {
 303           data = &insn_data[CODE_FOR_extv];
 304           break;
 305         }
 306       return MAX_MACHINE_MODE;
 307
 308     case EP_extzv:
 309       if (HAVE_extzv)
 310         {
 311           data = &insn_data[CODE_FOR_extzv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     default:
 317       gcc_unreachable ();
 318     }
 319
 320   if (opno == -1)
 321     return VOIDmode;
 322
 323   /* Everyone who uses this function used to follow it with
 324      if (result == VOIDmode) result = word_mode; */
 325   if (data->operand[opno].mode == VOIDmode)
 326     return word_mode;
 327   return data->operand[opno].mode;
 328 }
 329
 330 /* Return true if X, of mode MODE, matches the predicate for operand
 331    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 332    the ambient volatile_ok setting.  */
 333
 334 static bool
 335 check_predicate_volatile_ok (enum insn_code icode, int opno,
 336                              rtx x, enum machine_mode mode)
 337 {
 338   bool save_volatile_ok, result;
 339
 340   save_volatile_ok = volatile_ok;
 341   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 342   volatile_ok = save_volatile_ok;
 343   return result;
 344 }
 345 \f
 346 /* A subroutine of store_bit_field, with the same arguments.  Return true
 347    if the operation could be implemented.
 348
 349    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 350    no other way of implementing the operation.  If FALLBACK_P is false,
 351    return false instead.  */
 352
 353 static bool
 354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 355                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 356                    rtx value, bool fallback_p)
 357 {
 358   unsigned int unit
 359     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 360   unsigned HOST_WIDE_INT offset, bitpos;
 361   rtx op0 = str_rtx;
 362   int byte_offset;
 363   rtx orig_value;
 364
 365   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 366
 367   while (GET_CODE (op0) == SUBREG)
 368     {
 369       /* The following line once was done only if WORDS_BIG_ENDIAN,
 370          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 371          meaningful at a much higher level; when structures are copied
 372          between memory and regs, the higher-numbered regs
 373          always get higher addresses.  */
 374       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 375       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 376
 377       byte_offset = 0;
 378
 379       /* Paradoxical subregs need special handling on big endian machines.  */
 380       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 381         {
 382           int difference = inner_mode_size - outer_mode_size;
 383
 384           if (WORDS_BIG_ENDIAN)
 385             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 386           if (BYTES_BIG_ENDIAN)
 387             byte_offset += difference % UNITS_PER_WORD;
 388         }
 389       else
 390         byte_offset = SUBREG_BYTE (op0);
 391
 392       bitnum += byte_offset * BITS_PER_UNIT;
 393       op0 = SUBREG_REG (op0);
 394     }
 395
 396   /* No action is needed if the target is a register and if the field
 397      lies completely outside that register.  This can occur if the source
 398      code contains an out-of-bounds access to a small array.  */
 399   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 400     return true;
 401
 402   /* Use vec_set patterns for inserting parts of vectors whenever
 403      available.  */
 404   if (VECTOR_MODE_P (GET_MODE (op0))
 405       && !MEM_P (op0)
 406       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 407       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 408       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 409       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 410     {
 411       enum machine_mode outermode = GET_MODE (op0);
 412       enum machine_mode innermode = GET_MODE_INNER (outermode);
 413       int icode = (int) optab_handler (vec_set_optab, outermode);
 414       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 415       rtx rtxpos = GEN_INT (pos);
 416       rtx src = value;
 417       rtx dest = op0;
 418       rtx pat, seq;
 419       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 420       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 421       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 422
 423       start_sequence ();
 424
 425       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 426         src = copy_to_mode_reg (mode1, src);
 427
 428       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 429         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 430
 431       /* We could handle this, but we should always be called with a pseudo
 432          for our targets and all insns should take them as outputs.  */
 433       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 434                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 435                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 436       pat = GEN_FCN (icode) (dest, src, rtxpos);
 437       seq = get_insns ();
 438       end_sequence ();
 439       if (pat)
 440         {
 441           emit_insn (seq);
 442           emit_insn (pat);
 443           return true;
 444         }
 445     }
 446
 447   /* If the target is a register, overwriting the entire object, or storing
 448      a full-word or multi-word field can be done with just a SUBREG.
 449
 450      If the target is memory, storing any naturally aligned field can be
 451      done with a simple store.  For targets that support fast unaligned
 452      memory, any naturally sized, unit aligned field can be done directly.  */
 453
 454   offset = bitnum / unit;
 455   bitpos = bitnum % unit;
 456   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 457                 + (offset * UNITS_PER_WORD);
 458
 459   if (bitpos == 0
 460       && bitsize == GET_MODE_BITSIZE (fieldmode)
 461       && (!MEM_P (op0)
 462           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 463              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 464              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 465           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 466              || (offset * BITS_PER_UNIT % bitsize == 0
 467                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 468     {
 469       if (MEM_P (op0))
 470         op0 = adjust_address (op0, fieldmode, offset);
 471       else if (GET_MODE (op0) != fieldmode)
 472         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 473                                    byte_offset);
 474       emit_move_insn (op0, value);
 475       return true;
 476     }
 477
 478   /* Make sure we are playing with integral modes.  Pun with subregs
 479      if we aren't.  This must come after the entire register case above,
 480      since that case is valid for any mode.  The following cases are only
 481      valid for integral modes.  */
 482   {
 483     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 484     if (imode != GET_MODE (op0))
 485       {
 486         if (MEM_P (op0))
 487           op0 = adjust_address (op0, imode, 0);
 488         else
 489           {
 490             gcc_assert (imode != BLKmode);
 491             op0 = gen_lowpart (imode, op0);
 492           }
 493       }
 494   }
 495
 496   /* We may be accessing data outside the field, which means
 497      we can alias adjacent data.  */
 498   if (MEM_P (op0))
 499     {
 500       op0 = shallow_copy_rtx (op0);
 501       set_mem_alias_set (op0, 0);
 502       set_mem_expr (op0, 0);
 503     }
 504
 505   /* If OP0 is a register, BITPOS must count within a word.
 506      But as we have it, it counts within whatever size OP0 now has.
 507      On a bigendian machine, these are not the same, so convert.  */
 508   if (BYTES_BIG_ENDIAN
 509       && !MEM_P (op0)
 510       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 511     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 512
 513   /* Storing an lsb-aligned field in a register
 514      can be done with a movestrict instruction.  */
 515
 516   if (!MEM_P (op0)
 517       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 518       && bitsize == GET_MODE_BITSIZE (fieldmode)
 519       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 520     {
 521       int icode = optab_handler (movstrict_optab, fieldmode);
 522       rtx insn;
 523       rtx start = get_last_insn ();
 524       rtx arg0 = op0;
 525
 526       /* Get appropriate low part of the value being stored.  */
 527       if (CONST_INT_P (value) || REG_P (value))
 528         value = gen_lowpart (fieldmode, value);
 529       else if (!(GET_CODE (value) == SYMBOL_REF
 530                  || GET_CODE (value) == LABEL_REF
 531                  || GET_CODE (value) == CONST))
 532         value = convert_to_mode (fieldmode, value, 0);
 533
 534       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 535         value = copy_to_mode_reg (fieldmode, value);
 536
 537       if (GET_CODE (op0) == SUBREG)
 538         {
 539           /* Else we've got some float mode source being extracted into
 540              a different float mode destination -- this combination of
 541              subregs results in Severe Tire Damage.  */
 542           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 543                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 544                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 545           arg0 = SUBREG_REG (op0);
 546         }
 547
 548       insn = (GEN_FCN (icode)
 549                  (gen_rtx_SUBREG (fieldmode, arg0,
 550                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 551                                   + (offset * UNITS_PER_WORD)),
 552                                   value));
 553       if (insn)
 554         {
 555           emit_insn (insn);
 556           return true;
 557         }
 558       delete_insns_since (start);
 559     }
 560
 561   /* Handle fields bigger than a word.  */
 562
 563   if (bitsize > BITS_PER_WORD)
 564     {
 565       /* Here we transfer the words of the field
 566          in the order least significant first.
 567          This is because the most significant word is the one which may
 568          be less than full.
 569          However, only do that if the value is not BLKmode.  */
 570
 571       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 572       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 573       unsigned int i;
 574       rtx last;
 575
 576       /* This is the mode we must force value to, so that there will be enough
 577          subwords to extract.  Note that fieldmode will often (always?) be
 578          VOIDmode, because that is what store_field uses to indicate that this
 579          is a bit field, but passing VOIDmode to operand_subword_force
 580          is not allowed.  */
 581       fieldmode = GET_MODE (value);
 582       if (fieldmode == VOIDmode)
 583         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 584
 585       last = get_last_insn ();
 586       for (i = 0; i < nwords; i++)
 587         {
 588           /* If I is 0, use the low-order word in both field and target;
 589              if I is 1, use the next to lowest word; and so on.  */
 590           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 591           unsigned int bit_offset = (backwards
 592                                      ? MAX ((int) bitsize - ((int) i + 1)
 593                                             * BITS_PER_WORD,
 594                                             0)
 595                                      : (int) i * BITS_PER_WORD);
 596           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 597
 598           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 599                                             bitsize - i * BITS_PER_WORD),
 600                                   bitnum + bit_offset, word_mode,
 601                                   value_word, fallback_p))
 602             {
 603               delete_insns_since (last);
 604               return false;
 605             }
 606         }
 607       return true;
 608     }
 609
 610   /* From here on we can assume that the field to be stored in is
 611      a full-word (whatever type that is), since it is shorter than a word.  */
 612
 613   /* OFFSET is the number of words or bytes (UNIT says which)
 614      from STR_RTX to the first word or byte containing part of the field.  */
 615
 616   if (!MEM_P (op0))
 617     {
 618       if (offset != 0
 619           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 620         {
 621           if (!REG_P (op0))
 622             {
 623               /* Since this is a destination (lvalue), we can't copy
 624                  it to a pseudo.  We can remove a SUBREG that does not
 625                  change the size of the operand.  Such a SUBREG may
 626                  have been added above.  */
 627               gcc_assert (GET_CODE (op0) == SUBREG
 628                           && (GET_MODE_SIZE (GET_MODE (op0))
 629                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 630               op0 = SUBREG_REG (op0);
 631             }
 632           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 633                                 op0, (offset * UNITS_PER_WORD));
 634         }
 635       offset = 0;
 636     }
 637
 638   /* If VALUE has a floating-point or complex mode, access it as an
 639      integer of the corresponding size.  This can occur on a machine
 640      with 64 bit registers that uses SFmode for float.  It can also
 641      occur for unaligned float or complex fields.  */
 642   orig_value = value;
 643   if (GET_MODE (value) != VOIDmode
 644       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 645       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 646     {
 647       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 648       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 649     }
 650
 651   /* Now OFFSET is nonzero only if OP0 is memory
 652      and is therefore always measured in bytes.  */
 653
 654   if (HAVE_insv
 655       && GET_MODE (value) != BLKmode
 656       && bitsize > 0
 657       && GET_MODE_BITSIZE (op_mode) >= bitsize
 658       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 659             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 660       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 661                                                         VOIDmode)
 662       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 663     {
 664       int xbitpos = bitpos;
 665       rtx value1;
 666       rtx xop0 = op0;
 667       rtx last = get_last_insn ();
 668       rtx pat;
 669       bool copy_back = false;
 670
 671       /* Add OFFSET into OP0's address.  */
 672       if (MEM_P (xop0))
 673         xop0 = adjust_address (xop0, byte_mode, offset);
 674
 675       /* If xop0 is a register, we need it in OP_MODE
 676          to make it acceptable to the format of insv.  */
 677       if (GET_CODE (xop0) == SUBREG)
 678         /* We can't just change the mode, because this might clobber op0,
 679            and we will need the original value of op0 if insv fails.  */
 680         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 681       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 682         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 683
 684       /* If the destination is a paradoxical subreg such that we need a
 685          truncate to the inner mode, perform the insertion on a temporary and
 686          truncate the result to the original destination.  Note that we can't
 687          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 688          X) 0)) is (reg:N X).  */
 689       if (GET_CODE (xop0) == SUBREG
 690           && REG_P (SUBREG_REG (xop0))
 691           && (!TRULY_NOOP_TRUNCATION
 692               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 693                GET_MODE_BITSIZE (op_mode))))
 694         {
 695           rtx tem = gen_reg_rtx (op_mode);
 696           emit_move_insn (tem, xop0);
 697           xop0 = tem;
 698           copy_back = true;
 699         }
 700
 701       /* On big-endian machines, we count bits from the most significant.
 702          If the bit field insn does not, we must invert.  */
 703
 704       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 705         xbitpos = unit - bitsize - xbitpos;
 706
 707       /* We have been counting XBITPOS within UNIT.
 708          Count instead within the size of the register.  */
 709       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 710         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 711
 712       unit = GET_MODE_BITSIZE (op_mode);
 713
 714       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 715       value1 = value;
 716       if (GET_MODE (value) != op_mode)
 717         {
 718           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 719             {
 720               /* Optimization: Don't bother really extending VALUE
 721                  if it has all the bits we will actually use.  However,
 722                  if we must narrow it, be sure we do it correctly.  */
 723
 724               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 725                 {
 726                   rtx tmp;
 727
 728                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 729                   if (! tmp)
 730                     tmp = simplify_gen_subreg (op_mode,
 731                                                force_reg (GET_MODE (value),
 732                                                           value1),
 733                                                GET_MODE (value), 0);
 734                   value1 = tmp;
 735                 }
 736               else
 737                 value1 = gen_lowpart (op_mode, value1);
 738             }
 739           else if (CONST_INT_P (value))
 740             value1 = gen_int_mode (INTVAL (value), op_mode);
 741           else
 742             /* Parse phase is supposed to make VALUE's data type
 743                match that of the component reference, which is a type
 744                at least as wide as the field; so VALUE should have
 745                a mode that corresponds to that type.  */
 746             gcc_assert (CONSTANT_P (value));
 747         }
 748
 749       /* If this machine's insv insists on a register,
 750          get VALUE1 into a register.  */
 751       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 752              (value1, op_mode)))
 753         value1 = force_reg (op_mode, value1);
 754
 755       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 756       if (pat)
 757         {
 758           emit_insn (pat);
 759
 760           if (copy_back)
 761             convert_move (op0, xop0, true);
 762           return true;
 763         }
 764       delete_insns_since (last);
 765     }
 766
 767   /* If OP0 is a memory, try copying it to a register and seeing if a
 768      cheap register alternative is available.  */
 769   if (HAVE_insv && MEM_P (op0))
 770     {
 771       enum machine_mode bestmode;
 772
 773       /* Get the mode to use for inserting into this field.  If OP0 is
 774          BLKmode, get the smallest mode consistent with the alignment. If
 775          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 776          mode. Otherwise, use the smallest mode containing the field.  */
 777
 778       if (GET_MODE (op0) == BLKmode
 779           || (op_mode != MAX_MACHINE_MODE
 780               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 781         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 782                                   (op_mode == MAX_MACHINE_MODE
 783                                    ? VOIDmode : op_mode),
 784                                   MEM_VOLATILE_P (op0));
 785       else
 786         bestmode = GET_MODE (op0);
 787
 788       if (bestmode != VOIDmode
 789           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 790           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 791                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 792         {
 793           rtx last, tempreg, xop0;
 794           unsigned HOST_WIDE_INT xoffset, xbitpos;
 795
 796           last = get_last_insn ();
 797
 798           /* Adjust address to point to the containing unit of
 799              that mode.  Compute the offset as a multiple of this unit,
 800              counting in bytes.  */
 801           unit = GET_MODE_BITSIZE (bestmode);
 802           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 803           xbitpos = bitnum % unit;
 804           xop0 = adjust_address (op0, bestmode, xoffset);
 805
 806           /* Fetch that unit, store the bitfield in it, then store
 807              the unit.  */
 808           tempreg = copy_to_reg (xop0);
 809           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 810                                  fieldmode, orig_value, false))
 811             {
 812               emit_move_insn (xop0, tempreg);
 813               return true;
 814             }
 815           delete_insns_since (last);
 816         }
 817     }
 818
 819   if (!fallback_p)
 820     return false;
 821
 822   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 823   return true;
 824 }
 825
 826 /* Generate code to store value from rtx VALUE
 827    into a bit-field within structure STR_RTX
 828    containing BITSIZE bits starting at bit BITNUM.
 829    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 830
 831 void
 832 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 833                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 834                  rtx value)
 835 {
 836   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 837     gcc_unreachable ();
 838 }
 839 \f
 840 /* Use shifts and boolean operations to store VALUE
 841    into a bit field of width BITSIZE
 842    in a memory location specified by OP0 except offset by OFFSET bytes.
 843      (OFFSET must be 0 if OP0 is a register.)
 844    The field starts at position BITPOS within the byte.
 845     (If OP0 is a register, it may be a full word or a narrower mode,
 846      but BITPOS still counts within a full word,
 847      which is significant on bigendian machines.)  */
 848
 849 static void
 850 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 851                        unsigned HOST_WIDE_INT bitsize,
 852                        unsigned HOST_WIDE_INT bitpos, rtx value)
 853 {
 854   enum machine_mode mode;
 855   unsigned int total_bits = BITS_PER_WORD;
 856   rtx temp;
 857   int all_zero = 0;
 858   int all_one = 0;
 859
 860   /* There is a case not handled here:
 861      a structure with a known alignment of just a halfword
 862      and a field split across two aligned halfwords within the structure.
 863      Or likewise a structure with a known alignment of just a byte
 864      and a field split across two bytes.
 865      Such cases are not supposed to be able to occur.  */
 866
 867   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 868     {
 869       gcc_assert (!offset);
 870       /* Special treatment for a bit field split across two registers.  */
 871       if (bitsize + bitpos > BITS_PER_WORD)
 872         {
 873           store_split_bit_field (op0, bitsize, bitpos, value);
 874           return;
 875         }
 876     }
 877   else
 878     {
 879       /* Get the proper mode to use for this field.  We want a mode that
 880          includes the entire field.  If such a mode would be larger than
 881          a word, we won't be doing the extraction the normal way.
 882          We don't want a mode bigger than the destination.  */
 883
 884       mode = GET_MODE (op0);
 885       if (GET_MODE_BITSIZE (mode) == 0
 886           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 887         mode = word_mode;
 888
 889       if (MEM_VOLATILE_P (op0)
 890           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 891           && flag_strict_volatile_bitfields > 0)
 892         mode = GET_MODE (op0);
 893       else
 894         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 895                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 896
 897       if (mode == VOIDmode)
 898         {
 899           /* The only way this should occur is if the field spans word
 900              boundaries.  */
 901           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 902                                  value);
 903           return;
 904         }
 905
 906       total_bits = GET_MODE_BITSIZE (mode);
 907
 908       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 909          be in the range 0 to total_bits-1, and put any excess bytes in
 910          OFFSET.  */
 911       if (bitpos >= total_bits)
 912         {
 913           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 914           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 915                      * BITS_PER_UNIT);
 916         }
 917
 918       /* Get ref to an aligned byte, halfword, or word containing the field.
 919          Adjust BITPOS to be position within a word,
 920          and OFFSET to be the offset of that word.
 921          Then alter OP0 to refer to that word.  */
 922       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 923       offset -= (offset % (total_bits / BITS_PER_UNIT));
 924       op0 = adjust_address (op0, mode, offset);
 925     }
 926
 927   mode = GET_MODE (op0);
 928
 929   /* Now MODE is either some integral mode for a MEM as OP0,
 930      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 931      The bit field is contained entirely within OP0.
 932      BITPOS is the starting bit number within OP0.
 933      (OP0's mode may actually be narrower than MODE.)  */
 934
 935   if (BYTES_BIG_ENDIAN)
 936       /* BITPOS is the distance between our msb
 937          and that of the containing datum.
 938          Convert it to the distance from the lsb.  */
 939       bitpos = total_bits - bitsize - bitpos;
 940
 941   /* Now BITPOS is always the distance between our lsb
 942      and that of OP0.  */
 943
 944   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 945      we must first convert its mode to MODE.  */
 946
 947   if (CONST_INT_P (value))
 948     {
 949       HOST_WIDE_INT v = INTVAL (value);
 950
 951       if (bitsize < HOST_BITS_PER_WIDE_INT)
 952         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 953
 954       if (v == 0)
 955         all_zero = 1;
 956       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 957                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 958                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 959         all_one = 1;
 960
 961       value = lshift_value (mode, value, bitpos, bitsize);
 962     }
 963   else
 964     {
 965       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 966                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 967
 968       if (GET_MODE (value) != mode)
 969         value = convert_to_mode (mode, value, 1);
 970
 971       if (must_and)
 972         value = expand_binop (mode, and_optab, value,
 973                               mask_rtx (mode, 0, bitsize, 0),
 974                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 975       if (bitpos > 0)
 976         value = expand_shift (LSHIFT_EXPR, mode, value,
 977                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 978     }
 979
 980   /* Now clear the chosen bits in OP0,
 981      except that if VALUE is -1 we need not bother.  */
 982   /* We keep the intermediates in registers to allow CSE to combine
 983      consecutive bitfield assignments.  */
 984
 985   temp = force_reg (mode, op0);
 986
 987   if (! all_one)
 988     {
 989       temp = expand_binop (mode, and_optab, temp,
 990                            mask_rtx (mode, bitpos, bitsize, 1),
 991                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 992       temp = force_reg (mode, temp);
 993     }
 994
 995   /* Now logical-or VALUE into OP0, unless it is zero.  */
 996
 997   if (! all_zero)
 998     {
 999       temp = expand_binop (mode, ior_optab, temp, value,
1000                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1001       temp = force_reg (mode, temp);
1002     }
1003
1004   if (op0 != temp)
1005     {
1006       op0 = copy_rtx (op0);
1007       emit_move_insn (op0, temp);
1008     }
1009 }
1010 \f
1011 /* Store a bit field that is split across multiple accessible memory objects.
1012
1013    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1014    BITSIZE is the field width; BITPOS the position of its first bit
1015    (within the word).
1016    VALUE is the value to store.
1017
1018    This does not yet handle fields wider than BITS_PER_WORD.  */
1019
1020 static void
1021 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1022                        unsigned HOST_WIDE_INT bitpos, rtx value)
1023 {
1024   unsigned int unit;
1025   unsigned int bitsdone = 0;
1026
1027   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1028      much at a time.  */
1029   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1030     unit = BITS_PER_WORD;
1031   else
1032     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1033
1034   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1035      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1036      that VALUE might be a floating-point constant.  */
1037   if (CONSTANT_P (value) && !CONST_INT_P (value))
1038     {
1039       rtx word = gen_lowpart_common (word_mode, value);
1040
1041       if (word && (value != word))
1042         value = word;
1043       else
1044         value = gen_lowpart_common (word_mode,
1045                                     force_reg (GET_MODE (value) != VOIDmode
1046                                                ? GET_MODE (value)
1047                                                : word_mode, value));
1048     }
1049
1050   while (bitsdone < bitsize)
1051     {
1052       unsigned HOST_WIDE_INT thissize;
1053       rtx part, word;
1054       unsigned HOST_WIDE_INT thispos;
1055       unsigned HOST_WIDE_INT offset;
1056
1057       offset = (bitpos + bitsdone) / unit;
1058       thispos = (bitpos + bitsdone) % unit;
1059
1060       /* THISSIZE must not overrun a word boundary.  Otherwise,
1061          store_fixed_bit_field will call us again, and we will mutually
1062          recurse forever.  */
1063       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1064       thissize = MIN (thissize, unit - thispos);
1065
1066       if (BYTES_BIG_ENDIAN)
1067         {
1068           int total_bits;
1069
1070           /* We must do an endian conversion exactly the same way as it is
1071              done in extract_bit_field, so that the two calls to
1072              extract_fixed_bit_field will have comparable arguments.  */
1073           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1074             total_bits = BITS_PER_WORD;
1075           else
1076             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1077
1078           /* Fetch successively less significant portions.  */
1079           if (CONST_INT_P (value))
1080             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1081                              >> (bitsize - bitsdone - thissize))
1082                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1083           else
1084             /* The args are chosen so that the last part includes the
1085                lsb.  Give extract_bit_field the value it needs (with
1086                endianness compensation) to fetch the piece we want.  */
1087             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1088                                             total_bits - bitsize + bitsdone,
1089                                             NULL_RTX, 1);
1090         }
1091       else
1092         {
1093           /* Fetch successively more significant portions.  */
1094           if (CONST_INT_P (value))
1095             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1096                              >> bitsdone)
1097                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1098           else
1099             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1100                                             bitsdone, NULL_RTX, 1);
1101         }
1102
1103       /* If OP0 is a register, then handle OFFSET here.
1104
1105          When handling multiword bitfields, extract_bit_field may pass
1106          down a word_mode SUBREG of a larger REG for a bitfield that actually
1107          crosses a word boundary.  Thus, for a SUBREG, we must find
1108          the current word starting from the base register.  */
1109       if (GET_CODE (op0) == SUBREG)
1110         {
1111           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1112           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1113                                         GET_MODE (SUBREG_REG (op0)));
1114           offset = 0;
1115         }
1116       else if (REG_P (op0))
1117         {
1118           word = operand_subword_force (op0, offset, GET_MODE (op0));
1119           offset = 0;
1120         }
1121       else
1122         word = op0;
1123
1124       /* OFFSET is in UNITs, and UNIT is in bits.
1125          store_fixed_bit_field wants offset in bytes.  */
1126       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1127                              thispos, part);
1128       bitsdone += thissize;
1129     }
1130 }
1131 \f
1132 /* A subroutine of extract_bit_field_1 that converts return value X
1133    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1134    to extract_bit_field.  */
1135
1136 static rtx
1137 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1138                              enum machine_mode tmode, bool unsignedp)
1139 {
1140   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1141     return x;
1142
1143   /* If the x mode is not a scalar integral, first convert to the
1144      integer mode of that size and then access it as a floating-point
1145      value via a SUBREG.  */
1146   if (!SCALAR_INT_MODE_P (tmode))
1147     {
1148       enum machine_mode smode;
1149
1150       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1151       x = convert_to_mode (smode, x, unsignedp);
1152       x = force_reg (smode, x);
1153       return gen_lowpart (tmode, x);
1154     }
1155
1156   return convert_to_mode (tmode, x, unsignedp);
1157 }
1158
1159 /* A subroutine of extract_bit_field, with the same arguments.
1160    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1161    if we can find no other means of implementing the operation.
1162    if FALLBACK_P is false, return NULL instead.  */
1163
1164 static rtx
1165 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1166                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1167                      enum machine_mode mode, enum machine_mode tmode,
1168                      bool fallback_p)
1169 {
1170   unsigned int unit
1171     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1172   unsigned HOST_WIDE_INT offset, bitpos;
1173   rtx op0 = str_rtx;
1174   enum machine_mode int_mode;
1175   enum machine_mode ext_mode;
1176   enum machine_mode mode1;
1177   enum insn_code icode;
1178   int byte_offset;
1179
1180   if (tmode == VOIDmode)
1181     tmode = mode;
1182
1183   while (GET_CODE (op0) == SUBREG)
1184     {
1185       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1186       op0 = SUBREG_REG (op0);
1187     }
1188
1189   /* If we have an out-of-bounds access to a register, just return an
1190      uninitialized register of the required mode.  This can occur if the
1191      source code contains an out-of-bounds access to a small array.  */
1192   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1193     return gen_reg_rtx (tmode);
1194
1195   if (REG_P (op0)
1196       && mode == GET_MODE (op0)
1197       && bitnum == 0
1198       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1199     {
1200       /* We're trying to extract a full register from itself.  */
1201       return op0;
1202     }
1203
1204   /* See if we can get a better vector mode before extracting.  */
1205   if (VECTOR_MODE_P (GET_MODE (op0))
1206       && !MEM_P (op0)
1207       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1208     {
1209       enum machine_mode new_mode;
1210       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1211
1212       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1213         new_mode = MIN_MODE_VECTOR_FLOAT;
1214       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1215         new_mode = MIN_MODE_VECTOR_FRACT;
1216       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1217         new_mode = MIN_MODE_VECTOR_UFRACT;
1218       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1219         new_mode = MIN_MODE_VECTOR_ACCUM;
1220       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1221         new_mode = MIN_MODE_VECTOR_UACCUM;
1222       else
1223         new_mode = MIN_MODE_VECTOR_INT;
1224
1225       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1226         if (GET_MODE_NUNITS (new_mode) == nunits
1227             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1228             && targetm.vector_mode_supported_p (new_mode))
1229           break;
1230       if (new_mode != VOIDmode)
1231         op0 = gen_lowpart (new_mode, op0);
1232     }
1233
1234   /* Use vec_extract patterns for extracting parts of vectors whenever
1235      available.  */
1236   if (VECTOR_MODE_P (GET_MODE (op0))
1237       && !MEM_P (op0)
1238       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1239       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1240           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1241     {
1242       enum machine_mode outermode = GET_MODE (op0);
1243       enum machine_mode innermode = GET_MODE_INNER (outermode);
1244       int icode = (int) optab_handler (vec_extract_optab, outermode);
1245       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1246       rtx rtxpos = GEN_INT (pos);
1247       rtx src = op0;
1248       rtx dest = NULL, pat, seq;
1249       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1250       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1251       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1252
1253       if (innermode == tmode || innermode == mode)
1254         dest = target;
1255
1256       if (!dest)
1257         dest = gen_reg_rtx (innermode);
1258
1259       start_sequence ();
1260
1261       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1262         dest = copy_to_mode_reg (mode0, dest);
1263
1264       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1265         src = copy_to_mode_reg (mode1, src);
1266
1267       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1268         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1269
1270       /* We could handle this, but we should always be called with a pseudo
1271          for our targets and all insns should take them as outputs.  */
1272       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1273                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1274                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1275
1276       pat = GEN_FCN (icode) (dest, src, rtxpos);
1277       seq = get_insns ();
1278       end_sequence ();
1279       if (pat)
1280         {
1281           emit_insn (seq);
1282           emit_insn (pat);
1283           if (mode0 != mode)
1284             return gen_lowpart (tmode, dest);
1285           return dest;
1286         }
1287     }
1288
1289   /* Make sure we are playing with integral modes.  Pun with subregs
1290      if we aren't.  */
1291   {
1292     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1293     if (imode != GET_MODE (op0))
1294       {
1295         if (MEM_P (op0))
1296           op0 = adjust_address (op0, imode, 0);
1297         else if (imode != BLKmode)
1298           {
1299             op0 = gen_lowpart (imode, op0);
1300
1301             /* If we got a SUBREG, force it into a register since we
1302                aren't going to be able to do another SUBREG on it.  */
1303             if (GET_CODE (op0) == SUBREG)
1304               op0 = force_reg (imode, op0);
1305           }
1306         else if (REG_P (op0))
1307           {
1308             rtx reg, subreg;
1309             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1310                                             MODE_INT);
1311             reg = gen_reg_rtx (imode);
1312             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1313             emit_move_insn (subreg, op0);
1314             op0 = reg;
1315             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1316           }
1317         else
1318           {
1319             rtx mem = assign_stack_temp (GET_MODE (op0),
1320                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1321             emit_move_insn (mem, op0);
1322             op0 = adjust_address (mem, BLKmode, 0);
1323           }
1324       }
1325   }
1326
1327   /* We may be accessing data outside the field, which means
1328      we can alias adjacent data.  */
1329   if (MEM_P (op0))
1330     {
1331       op0 = shallow_copy_rtx (op0);
1332       set_mem_alias_set (op0, 0);
1333       set_mem_expr (op0, 0);
1334     }
1335
1336   /* Extraction of a full-word or multi-word value from a structure
1337      in a register or aligned memory can be done with just a SUBREG.
1338      A subword value in the least significant part of a register
1339      can also be extracted with a SUBREG.  For this, we need the
1340      byte offset of the value in op0.  */
1341
1342   bitpos = bitnum % unit;
1343   offset = bitnum / unit;
1344   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1345
1346   /* If OP0 is a register, BITPOS must count within a word.
1347      But as we have it, it counts within whatever size OP0 now has.
1348      On a bigendian machine, these are not the same, so convert.  */
1349   if (BYTES_BIG_ENDIAN
1350       && !MEM_P (op0)
1351       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1352     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1353
1354   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1355      If that's wrong, the solution is to test for it and set TARGET to 0
1356      if needed.  */
1357
1358   /* Only scalar integer modes can be converted via subregs.  There is an
1359      additional problem for FP modes here in that they can have a precision
1360      which is different from the size.  mode_for_size uses precision, but
1361      we want a mode based on the size, so we must avoid calling it for FP
1362      modes.  */
1363   mode1  = (SCALAR_INT_MODE_P (tmode)
1364             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1365             : mode);
1366
1367   /* If the bitfield is volatile, we need to make sure the access
1368      remains on a type-aligned boundary.  */
1369   if (GET_CODE (op0) == MEM
1370       && MEM_VOLATILE_P (op0)
1371       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1372       && flag_strict_volatile_bitfields > 0)
1373     goto no_subreg_mode_swap;
1374
1375   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1376         && bitpos % BITS_PER_WORD == 0)
1377        || (mode1 != BLKmode
1378            /* ??? The big endian test here is wrong.  This is correct
1379               if the value is in a register, and if mode_for_size is not
1380               the same mode as op0.  This causes us to get unnecessarily
1381               inefficient code from the Thumb port when -mbig-endian.  */
1382            && (BYTES_BIG_ENDIAN
1383                ? bitpos + bitsize == BITS_PER_WORD
1384                : bitpos == 0)))
1385       && ((!MEM_P (op0)
1386            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1387                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1388            && GET_MODE_SIZE (mode1) != 0
1389            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1390           || (MEM_P (op0)
1391               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1392                   || (offset * BITS_PER_UNIT % bitsize == 0
1393                       && MEM_ALIGN (op0) % bitsize == 0)))))
1394     {
1395       if (MEM_P (op0))
1396         op0 = adjust_address (op0, mode1, offset);
1397       else if (mode1 != GET_MODE (op0))
1398         {
1399           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1400                                          byte_offset);
1401           if (sub == NULL)
1402             goto no_subreg_mode_swap;
1403           op0 = sub;
1404         }
1405       if (mode1 != mode)
1406         return convert_to_mode (tmode, op0, unsignedp);
1407       return op0;
1408     }
1409  no_subreg_mode_swap:
1410
1411   /* Handle fields bigger than a word.  */
1412
1413   if (bitsize > BITS_PER_WORD)
1414     {
1415       /* Here we transfer the words of the field
1416          in the order least significant first.
1417          This is because the most significant word is the one which may
1418          be less than full.  */
1419
1420       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1421       unsigned int i;
1422
1423       if (target == 0 || !REG_P (target))
1424         target = gen_reg_rtx (mode);
1425
1426       /* Indicate for flow that the entire target reg is being set.  */
1427       emit_clobber (target);
1428
1429       for (i = 0; i < nwords; i++)
1430         {
1431           /* If I is 0, use the low-order word in both field and target;
1432              if I is 1, use the next to lowest word; and so on.  */
1433           /* Word number in TARGET to use.  */
1434           unsigned int wordnum
1435             = (WORDS_BIG_ENDIAN
1436                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1437                : i);
1438           /* Offset from start of field in OP0.  */
1439           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1440                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1441                                                 * (int) BITS_PER_WORD))
1442                                      : (int) i * BITS_PER_WORD);
1443           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1444           rtx result_part
1445             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1446                                            bitsize - i * BITS_PER_WORD),
1447                                  bitnum + bit_offset, 1, target_part, mode,
1448                                  word_mode);
1449
1450           gcc_assert (target_part);
1451
1452           if (result_part != target_part)
1453             emit_move_insn (target_part, result_part);
1454         }
1455
1456       if (unsignedp)
1457         {
1458           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1459              need to be zero'd out.  */
1460           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1461             {
1462               unsigned int i, total_words;
1463
1464               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1465               for (i = nwords; i < total_words; i++)
1466                 emit_move_insn
1467                   (operand_subword (target,
1468                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1469                                     1, VOIDmode),
1470                    const0_rtx);
1471             }
1472           return target;
1473         }
1474
1475       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1476       target = expand_shift (LSHIFT_EXPR, mode, target,
1477                              build_int_cst (NULL_TREE,
1478                                             GET_MODE_BITSIZE (mode) - bitsize),
1479                              NULL_RTX, 0);
1480       return expand_shift (RSHIFT_EXPR, mode, target,
1481                            build_int_cst (NULL_TREE,
1482                                           GET_MODE_BITSIZE (mode) - bitsize),
1483                            NULL_RTX, 0);
1484     }
1485
1486   /* From here on we know the desired field is smaller than a word.  */
1487
1488   /* Check if there is a correspondingly-sized integer field, so we can
1489      safely extract it as one size of integer, if necessary; then
1490      truncate or extend to the size that is wanted; then use SUBREGs or
1491      convert_to_mode to get one of the modes we really wanted.  */
1492
1493   int_mode = int_mode_for_mode (tmode);
1494   if (int_mode == BLKmode)
1495     int_mode = int_mode_for_mode (mode);
1496   /* Should probably push op0 out to memory and then do a load.  */
1497   gcc_assert (int_mode != BLKmode);
1498
1499   /* OFFSET is the number of words or bytes (UNIT says which)
1500      from STR_RTX to the first word or byte containing part of the field.  */
1501   if (!MEM_P (op0))
1502     {
1503       if (offset != 0
1504           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1505         {
1506           if (!REG_P (op0))
1507             op0 = copy_to_reg (op0);
1508           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1509                                 op0, (offset * UNITS_PER_WORD));
1510         }
1511       offset = 0;
1512     }
1513
1514   /* Now OFFSET is nonzero only for memory operands.  */
1515   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1516   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1517   if (ext_mode != MAX_MACHINE_MODE
1518       && bitsize > 0
1519       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1520       /* If op0 is a register, we need it in EXT_MODE to make it
1521          acceptable to the format of ext(z)v.  */
1522       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1523       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1524            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1525       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1526     {
1527       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1528       rtx bitsize_rtx, bitpos_rtx;
1529       rtx last = get_last_insn ();
1530       rtx xop0 = op0;
1531       rtx xtarget = target;
1532       rtx xspec_target = target;
1533       rtx xspec_target_subreg = 0;
1534       rtx pat;
1535
1536       /* If op0 is a register, we need it in EXT_MODE to make it
1537          acceptable to the format of ext(z)v.  */
1538       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1539         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1540       if (MEM_P (xop0))
1541         /* Get ref to first byte containing part of the field.  */
1542         xop0 = adjust_address (xop0, byte_mode, xoffset);
1543
1544       /* On big-endian machines, we count bits from the most significant.
1545          If the bit field insn does not, we must invert.  */
1546       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1547         xbitpos = unit - bitsize - xbitpos;
1548
1549       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1550       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1551         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1552
1553       unit = GET_MODE_BITSIZE (ext_mode);
1554
1555       if (xtarget == 0)
1556         xtarget = xspec_target = gen_reg_rtx (tmode);
1557
1558       if (GET_MODE (xtarget) != ext_mode)
1559         {
1560           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1561              between the mode of the extraction (word_mode) and the target
1562              mode.  Instead, create a temporary and use convert_move to set
1563              the target.  */
1564           if (REG_P (xtarget)
1565               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1566                                         GET_MODE_BITSIZE (ext_mode)))
1567             {
1568               xtarget = gen_lowpart (ext_mode, xtarget);
1569               if (GET_MODE_SIZE (ext_mode)
1570                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1571                 xspec_target_subreg = xtarget;
1572             }
1573           else
1574             xtarget = gen_reg_rtx (ext_mode);
1575         }
1576
1577       /* If this machine's ext(z)v insists on a register target,
1578          make sure we have one.  */
1579       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1580         xtarget = gen_reg_rtx (ext_mode);
1581
1582       bitsize_rtx = GEN_INT (bitsize);
1583       bitpos_rtx = GEN_INT (xbitpos);
1584
1585       pat = (unsignedp
1586              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1587              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1588       if (pat)
1589         {
1590           emit_insn (pat);
1591           if (xtarget == xspec_target)
1592             return xtarget;
1593           if (xtarget == xspec_target_subreg)
1594             return xspec_target;
1595           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1596         }
1597       delete_insns_since (last);
1598     }
1599
1600   /* If OP0 is a memory, try copying it to a register and seeing if a
1601      cheap register alternative is available.  */
1602   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1603     {
1604       enum machine_mode bestmode;
1605
1606       /* Get the mode to use for inserting into this field.  If
1607          OP0 is BLKmode, get the smallest mode consistent with the
1608          alignment. If OP0 is a non-BLKmode object that is no
1609          wider than EXT_MODE, use its mode. Otherwise, use the
1610          smallest mode containing the field.  */
1611
1612       if (GET_MODE (op0) == BLKmode
1613           || (ext_mode != MAX_MACHINE_MODE
1614               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1615         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1616                                   (ext_mode == MAX_MACHINE_MODE
1617                                    ? VOIDmode : ext_mode),
1618                                   MEM_VOLATILE_P (op0));
1619       else
1620         bestmode = GET_MODE (op0);
1621
1622       if (bestmode != VOIDmode
1623           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1624                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1625         {
1626           unsigned HOST_WIDE_INT xoffset, xbitpos;
1627
1628           /* Compute the offset as a multiple of this unit,
1629              counting in bytes.  */
1630           unit = GET_MODE_BITSIZE (bestmode);
1631           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1632           xbitpos = bitnum % unit;
1633
1634           /* Make sure the register is big enough for the whole field.  */
1635           if (xoffset * BITS_PER_UNIT + unit
1636               >= offset * BITS_PER_UNIT + bitsize)
1637             {
1638               rtx last, result, xop0;
1639
1640               last = get_last_insn ();
1641
1642               /* Fetch it to a register in that size.  */
1643               xop0 = adjust_address (op0, bestmode, xoffset);
1644               xop0 = force_reg (bestmode, xop0);
1645               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1646                                             unsignedp, target,
1647                                             mode, tmode, false);
1648               if (result)
1649                 return result;
1650
1651               delete_insns_since (last);
1652             }
1653         }
1654     }
1655
1656   if (!fallback_p)
1657     return NULL;
1658
1659   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1660                                     bitpos, target, unsignedp);
1661   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1662 }
1663
1664 /* Generate code to extract a byte-field from STR_RTX
1665    containing BITSIZE bits, starting at BITNUM,
1666    and put it in TARGET if possible (if TARGET is nonzero).
1667    Regardless of TARGET, we return the rtx for where the value is placed.
1668
1669    STR_RTX is the structure containing the byte (a REG or MEM).
1670    UNSIGNEDP is nonzero if this is an unsigned bit field.
1671    MODE is the natural mode of the field value once extracted.
1672    TMODE is the mode the caller would like the value to have;
1673    but the value may be returned with type MODE instead.
1674
1675    If a TARGET is specified and we can store in it at no extra cost,
1676    we do so, and return TARGET.
1677    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1678    if they are equally easy.  */
1679
1680 rtx
1681 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1682                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1683                    enum machine_mode mode, enum machine_mode tmode)
1684 {
1685   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1686                               target, mode, tmode, true);
1687 }
1688 \f
1689 /* Extract a bit field using shifts and boolean operations
1690    Returns an rtx to represent the value.
1691    OP0 addresses a register (word) or memory (byte).
1692    BITPOS says which bit within the word or byte the bit field starts in.
1693    OFFSET says how many bytes farther the bit field starts;
1694     it is 0 if OP0 is a register.
1695    BITSIZE says how many bits long the bit field is.
1696     (If OP0 is a register, it may be narrower than a full word,
1697      but BITPOS still counts within a full word,
1698      which is significant on bigendian machines.)
1699
1700    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1701    If TARGET is nonzero, attempts to store the value there
1702    and return TARGET, but this is not guaranteed.
1703    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1704
1705 static rtx
1706 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1707                          unsigned HOST_WIDE_INT offset,
1708                          unsigned HOST_WIDE_INT bitsize,
1709                          unsigned HOST_WIDE_INT bitpos, rtx target,
1710                          int unsignedp)
1711 {
1712   unsigned int total_bits = BITS_PER_WORD;
1713   enum machine_mode mode;
1714
1715   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1716     {
1717       /* Special treatment for a bit field split across two registers.  */
1718       if (bitsize + bitpos > BITS_PER_WORD)
1719         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1720     }
1721   else
1722     {
1723       /* Get the proper mode to use for this field.  We want a mode that
1724          includes the entire field.  If such a mode would be larger than
1725          a word, we won't be doing the extraction the normal way.  */
1726
1727       if (MEM_VOLATILE_P (op0)
1728           && flag_strict_volatile_bitfields > 0)
1729         {
1730           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1731             mode = GET_MODE (op0);
1732           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1733             mode = GET_MODE (target);
1734           else
1735             mode = tmode;
1736         }
1737       else
1738         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1739                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1740
1741       if (mode == VOIDmode)
1742         /* The only way this should occur is if the field spans word
1743            boundaries.  */
1744         return extract_split_bit_field (op0, bitsize,
1745                                         bitpos + offset * BITS_PER_UNIT,
1746                                         unsignedp);
1747
1748       total_bits = GET_MODE_BITSIZE (mode);
1749
1750       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1751          be in the range 0 to total_bits-1, and put any excess bytes in
1752          OFFSET.  */
1753       if (bitpos >= total_bits)
1754         {
1755           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1756           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1757                      * BITS_PER_UNIT);
1758         }
1759
1760       /* If we're accessing a volatile MEM, we can't do the next
1761          alignment step if it results in a multi-word access where we
1762          otherwise wouldn't have one.  So, check for that case
1763          here.  */
1764       if (MEM_P (op0)
1765           && MEM_VOLATILE_P (op0)
1766           && flag_strict_volatile_bitfields > 0
1767           && bitpos + bitsize <= total_bits
1768           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1769         {
1770           if (STRICT_ALIGNMENT)
1771             {
1772               static bool informed_about_misalignment = false;
1773               bool warned;
1774
1775               if (bitsize == total_bits)
1776                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1777                                      "mis-aligned access used for structure member");
1778               else
1779                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1780                                      "mis-aligned access used for structure bitfield");
1781
1782               if (! informed_about_misalignment && warned)
1783                 {
1784                   informed_about_misalignment = true;
1785                   inform (input_location,
1786                           "When a volatile object spans multiple type-sized locations,"
1787                           " the compiler must choose between using a single mis-aligned access to"
1788                           " preserve the volatility, or using multiple aligned accesses to avoid"
1789                           " runtime faults.  This code may fail at runtime if the hardware does"
1790                           " not allow this access.");
1791                 }
1792             }
1793         }
1794       else
1795         {
1796
1797           /* Get ref to an aligned byte, halfword, or word containing the field.
1798              Adjust BITPOS to be position within a word,
1799              and OFFSET to be the offset of that word.
1800              Then alter OP0 to refer to that word.  */
1801           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1802           offset -= (offset % (total_bits / BITS_PER_UNIT));
1803         }
1804
1805       op0 = adjust_address (op0, mode, offset);
1806     }
1807
1808   mode = GET_MODE (op0);
1809
1810   if (BYTES_BIG_ENDIAN)
1811     /* BITPOS is the distance between our msb and that of OP0.
1812        Convert it to the distance from the lsb.  */
1813     bitpos = total_bits - bitsize - bitpos;
1814
1815   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1816      We have reduced the big-endian case to the little-endian case.  */
1817
1818   if (unsignedp)
1819     {
1820       if (bitpos)
1821         {
1822           /* If the field does not already start at the lsb,
1823              shift it so it does.  */
1824           tree amount = build_int_cst (NULL_TREE, bitpos);
1825           /* Maybe propagate the target for the shift.  */
1826           /* But not if we will return it--could confuse integrate.c.  */
1827           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1828           if (tmode != mode) subtarget = 0;
1829           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1830         }
1831       /* Convert the value to the desired mode.  */
1832       if (mode != tmode)
1833         op0 = convert_to_mode (tmode, op0, 1);
1834
1835       /* Unless the msb of the field used to be the msb when we shifted,
1836          mask out the upper bits.  */
1837
1838       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1839         return expand_binop (GET_MODE (op0), and_optab, op0,
1840                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1841                              target, 1, OPTAB_LIB_WIDEN);
1842       return op0;
1843     }
1844
1845   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1846      then arithmetic-shift its lsb to the lsb of the word.  */
1847   op0 = force_reg (mode, op0);
1848   if (mode != tmode)
1849     target = 0;
1850
1851   /* Find the narrowest integer mode that contains the field.  */
1852
1853   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1854        mode = GET_MODE_WIDER_MODE (mode))
1855     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1856       {
1857         op0 = convert_to_mode (mode, op0, 0);
1858         break;
1859       }
1860
1861   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1862     {
1863       tree amount
1864         = build_int_cst (NULL_TREE,
1865                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1866       /* Maybe propagate the target for the shift.  */
1867       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1868       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1869     }
1870
1871   return expand_shift (RSHIFT_EXPR, mode, op0,
1872                        build_int_cst (NULL_TREE,
1873                                       GET_MODE_BITSIZE (mode) - bitsize),
1874                        target, 0);
1875 }
1876 \f
1877 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1878    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1879    complement of that if COMPLEMENT.  The mask is truncated if
1880    necessary to the width of mode MODE.  The mask is zero-extended if
1881    BITSIZE+BITPOS is too small for MODE.  */
1882
1883 static rtx
1884 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1885 {
1886   double_int mask;
1887
1888   mask = double_int_mask (bitsize);
1889   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1890
1891   if (complement)
1892     mask = double_int_not (mask);
1893
1894   return immed_double_int_const (mask, mode);
1895 }
1896
1897 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1898    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1899
1900 static rtx
1901 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1902 {
1903   double_int val;
1904
1905   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1906   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1907
1908   return immed_double_int_const (val, mode);
1909 }
1910 \f
1911 /* Extract a bit field that is split across two words
1912    and return an RTX for the result.
1913
1914    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1915    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1916    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1917
1918 static rtx
1919 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1920                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1921 {
1922   unsigned int unit;
1923   unsigned int bitsdone = 0;
1924   rtx result = NULL_RTX;
1925   int first = 1;
1926
1927   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1928      much at a time.  */
1929   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1930     unit = BITS_PER_WORD;
1931   else
1932     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1933
1934   while (bitsdone < bitsize)
1935     {
1936       unsigned HOST_WIDE_INT thissize;
1937       rtx part, word;
1938       unsigned HOST_WIDE_INT thispos;
1939       unsigned HOST_WIDE_INT offset;
1940
1941       offset = (bitpos + bitsdone) / unit;
1942       thispos = (bitpos + bitsdone) % unit;
1943
1944       /* THISSIZE must not overrun a word boundary.  Otherwise,
1945          extract_fixed_bit_field will call us again, and we will mutually
1946          recurse forever.  */
1947       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1948       thissize = MIN (thissize, unit - thispos);
1949
1950       /* If OP0 is a register, then handle OFFSET here.
1951
1952          When handling multiword bitfields, extract_bit_field may pass
1953          down a word_mode SUBREG of a larger REG for a bitfield that actually
1954          crosses a word boundary.  Thus, for a SUBREG, we must find
1955          the current word starting from the base register.  */
1956       if (GET_CODE (op0) == SUBREG)
1957         {
1958           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1959           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1960                                         GET_MODE (SUBREG_REG (op0)));
1961           offset = 0;
1962         }
1963       else if (REG_P (op0))
1964         {
1965           word = operand_subword_force (op0, offset, GET_MODE (op0));
1966           offset = 0;
1967         }
1968       else
1969         word = op0;
1970
1971       /* Extract the parts in bit-counting order,
1972          whose meaning is determined by BYTES_PER_UNIT.
1973          OFFSET is in UNITs, and UNIT is in bits.
1974          extract_fixed_bit_field wants offset in bytes.  */
1975       part = extract_fixed_bit_field (word_mode, word,
1976                                       offset * unit / BITS_PER_UNIT,
1977                                       thissize, thispos, 0, 1);
1978       bitsdone += thissize;
1979
1980       /* Shift this part into place for the result.  */
1981       if (BYTES_BIG_ENDIAN)
1982         {
1983           if (bitsize != bitsdone)
1984             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1985                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1986                                  0, 1);
1987         }
1988       else
1989         {
1990           if (bitsdone != thissize)
1991             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1992                                  build_int_cst (NULL_TREE,
1993                                                 bitsdone - thissize), 0, 1);
1994         }
1995
1996       if (first)
1997         result = part;
1998       else
1999         /* Combine the parts with bitwise or.  This works
2000            because we extracted each part as an unsigned bit field.  */
2001         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2002                                OPTAB_LIB_WIDEN);
2003
2004       first = 0;
2005     }
2006
2007   /* Unsigned bit field: we are done.  */
2008   if (unsignedp)
2009     return result;
2010   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2011   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2012                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2013                          NULL_RTX, 0);
2014   return expand_shift (RSHIFT_EXPR, word_mode, result,
2015                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2016                        NULL_RTX, 0);
2017 }
2018 \f
2019 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2020    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2021    MODE, fill the upper bits with zeros.  Fail if the layout of either
2022    mode is unknown (as for CC modes) or if the extraction would involve
2023    unprofitable mode punning.  Return the value on success, otherwise
2024    return null.
2025
2026    This is different from gen_lowpart* in these respects:
2027
2028      - the returned value must always be considered an rvalue
2029
2030      - when MODE is wider than SRC_MODE, the extraction involves
2031        a zero extension
2032
2033      - when MODE is smaller than SRC_MODE, the extraction involves
2034        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2035
2036    In other words, this routine performs a computation, whereas the
2037    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2038    operations.  */
2039
2040 rtx
2041 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2042 {
2043   enum machine_mode int_mode, src_int_mode;
2044
2045   if (mode == src_mode)
2046     return src;
2047
2048   if (CONSTANT_P (src))
2049     {
2050       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2051          fails, it will happily create (subreg (symbol_ref)) or similar
2052          invalid SUBREGs.  */
2053       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2054       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2055       if (ret)
2056         return ret;
2057
2058       if (GET_MODE (src) == VOIDmode
2059           || !validate_subreg (mode, src_mode, src, byte))
2060         return NULL_RTX;
2061
2062       src = force_reg (GET_MODE (src), src);
2063       return gen_rtx_SUBREG (mode, src, byte);
2064     }
2065
2066   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2067     return NULL_RTX;
2068
2069   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2070       && MODES_TIEABLE_P (mode, src_mode))
2071     {
2072       rtx x = gen_lowpart_common (mode, src);
2073       if (x)
2074         return x;
2075     }
2076
2077   src_int_mode = int_mode_for_mode (src_mode);
2078   int_mode = int_mode_for_mode (mode);
2079   if (src_int_mode == BLKmode || int_mode == BLKmode)
2080     return NULL_RTX;
2081
2082   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2083     return NULL_RTX;
2084   if (!MODES_TIEABLE_P (int_mode, mode))
2085     return NULL_RTX;
2086
2087   src = gen_lowpart (src_int_mode, src);
2088   src = convert_modes (int_mode, src_int_mode, src, true);
2089   src = gen_lowpart (mode, src);
2090   return src;
2091 }
2092 \f
2093 /* Add INC into TARGET.  */
2094
2095 void
2096 expand_inc (rtx target, rtx inc)
2097 {
2098   rtx value = expand_binop (GET_MODE (target), add_optab,
2099                             target, inc,
2100                             target, 0, OPTAB_LIB_WIDEN);
2101   if (value != target)
2102     emit_move_insn (target, value);
2103 }
2104
2105 /* Subtract DEC from TARGET.  */
2106
2107 void
2108 expand_dec (rtx target, rtx dec)
2109 {
2110   rtx value = expand_binop (GET_MODE (target), sub_optab,
2111                             target, dec,
2112                             target, 0, OPTAB_LIB_WIDEN);
2113   if (value != target)
2114     emit_move_insn (target, value);
2115 }
2116 \f
2117 /* Output a shift instruction for expression code CODE,
2118    with SHIFTED being the rtx for the value to shift,
2119    and AMOUNT the tree for the amount to shift by.
2120    Store the result in the rtx TARGET, if that is convenient.
2121    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2122    Return the rtx for where the value is.  */
2123
2124 rtx
2125 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2126               tree amount, rtx target, int unsignedp)
2127 {
2128   rtx op1, temp = 0;
2129   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2130   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2131   optab lshift_optab = ashl_optab;
2132   optab rshift_arith_optab = ashr_optab;
2133   optab rshift_uns_optab = lshr_optab;
2134   optab lrotate_optab = rotl_optab;
2135   optab rrotate_optab = rotr_optab;
2136   enum machine_mode op1_mode;
2137   int attempt;
2138   bool speed = optimize_insn_for_speed_p ();
2139
2140   op1 = expand_normal (amount);
2141   op1_mode = GET_MODE (op1);
2142
2143   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2144      shift amount is a vector, use the vector/vector shift patterns.  */
2145   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2146     {
2147       lshift_optab = vashl_optab;
2148       rshift_arith_optab = vashr_optab;
2149       rshift_uns_optab = vlshr_optab;
2150       lrotate_optab = vrotl_optab;
2151       rrotate_optab = vrotr_optab;
2152     }
2153
2154   /* Previously detected shift-counts computed by NEGATE_EXPR
2155      and shifted in the other direction; but that does not work
2156      on all machines.  */
2157
2158   if (SHIFT_COUNT_TRUNCATED)
2159     {
2160       if (CONST_INT_P (op1)
2161           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2162               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2163         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2164                        % GET_MODE_BITSIZE (mode));
2165       else if (GET_CODE (op1) == SUBREG
2166                && subreg_lowpart_p (op1)
2167                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2168         op1 = SUBREG_REG (op1);
2169     }
2170
2171   if (op1 == const0_rtx)
2172     return shifted;
2173
2174   /* Check whether its cheaper to implement a left shift by a constant
2175      bit count by a sequence of additions.  */
2176   if (code == LSHIFT_EXPR
2177       && CONST_INT_P (op1)
2178       && INTVAL (op1) > 0
2179       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2180       && INTVAL (op1) < MAX_BITS_PER_WORD
2181       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2182       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2183     {
2184       int i;
2185       for (i = 0; i < INTVAL (op1); i++)
2186         {
2187           temp = force_reg (mode, shifted);
2188           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2189                                   unsignedp, OPTAB_LIB_WIDEN);
2190         }
2191       return shifted;
2192     }
2193
2194   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2195     {
2196       enum optab_methods methods;
2197
2198       if (attempt == 0)
2199         methods = OPTAB_DIRECT;
2200       else if (attempt == 1)
2201         methods = OPTAB_WIDEN;
2202       else
2203         methods = OPTAB_LIB_WIDEN;
2204
2205       if (rotate)
2206         {
2207           /* Widening does not work for rotation.  */
2208           if (methods == OPTAB_WIDEN)
2209             continue;
2210           else if (methods == OPTAB_LIB_WIDEN)
2211             {
2212               /* If we have been unable to open-code this by a rotation,
2213                  do it as the IOR of two shifts.  I.e., to rotate A
2214                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2215                  where C is the bitsize of A.
2216
2217                  It is theoretically possible that the target machine might
2218                  not be able to perform either shift and hence we would
2219                  be making two libcalls rather than just the one for the
2220                  shift (similarly if IOR could not be done).  We will allow
2221                  this extremely unlikely lossage to avoid complicating the
2222                  code below.  */
2223
2224               rtx subtarget = target == shifted ? 0 : target;
2225               tree new_amount, other_amount;
2226               rtx temp1;
2227               tree type = TREE_TYPE (amount);
2228               if (GET_MODE (op1) != TYPE_MODE (type)
2229                   && GET_MODE (op1) != VOIDmode)
2230                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2231               new_amount = make_tree (type, op1);
2232               other_amount
2233                 = fold_build2 (MINUS_EXPR, type,
2234                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2235                                new_amount);
2236
2237               shifted = force_reg (mode, shifted);
2238
2239               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2240                                    mode, shifted, new_amount, 0, 1);
2241               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2242                                     mode, shifted, other_amount, subtarget, 1);
2243               return expand_binop (mode, ior_optab, temp, temp1, target,
2244                                    unsignedp, methods);
2245             }
2246
2247           temp = expand_binop (mode,
2248                                left ? lrotate_optab : rrotate_optab,
2249                                shifted, op1, target, unsignedp, methods);
2250         }
2251       else if (unsignedp)
2252         temp = expand_binop (mode,
2253                              left ? lshift_optab : rshift_uns_optab,
2254                              shifted, op1, target, unsignedp, methods);
2255
2256       /* Do arithmetic shifts.
2257          Also, if we are going to widen the operand, we can just as well
2258          use an arithmetic right-shift instead of a logical one.  */
2259       if (temp == 0 && ! rotate
2260           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2261         {
2262           enum optab_methods methods1 = methods;
2263
2264           /* If trying to widen a log shift to an arithmetic shift,
2265              don't accept an arithmetic shift of the same size.  */
2266           if (unsignedp)
2267             methods1 = OPTAB_MUST_WIDEN;
2268
2269           /* Arithmetic shift */
2270
2271           temp = expand_binop (mode,
2272                                left ? lshift_optab : rshift_arith_optab,
2273                                shifted, op1, target, unsignedp, methods1);
2274         }
2275
2276       /* We used to try extzv here for logical right shifts, but that was
2277          only useful for one machine, the VAX, and caused poor code
2278          generation there for lshrdi3, so the code was deleted and a
2279          define_expand for lshrsi3 was added to vax.md.  */
2280     }
2281
2282   gcc_assert (temp);
2283   return temp;
2284 }
2285 \f
2286 enum alg_code {
2287   alg_unknown,
2288   alg_zero,
2289   alg_m, alg_shift,
2290   alg_add_t_m2,
2291   alg_sub_t_m2,
2292   alg_add_factor,
2293   alg_sub_factor,
2294   alg_add_t2_m,
2295   alg_sub_t2_m,
2296   alg_impossible
2297 };
2298
2299 /* This structure holds the "cost" of a multiply sequence.  The
2300    "cost" field holds the total rtx_cost of every operator in the
2301    synthetic multiplication sequence, hence cost(a op b) is defined
2302    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2303    The "latency" field holds the minimum possible latency of the
2304    synthetic multiply, on a hypothetical infinitely parallel CPU.
2305    This is the critical path, or the maximum height, of the expression
2306    tree which is the sum of rtx_costs on the most expensive path from
2307    any leaf to the root.  Hence latency(a op b) is defined as zero for
2308    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2309
2310 struct mult_cost {
2311   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2312   short latency;  /* The latency of the multiplication sequence.  */
2313 };
2314
2315 /* This macro is used to compare a pointer to a mult_cost against an
2316    single integer "rtx_cost" value.  This is equivalent to the macro
2317    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2318 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2319                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2320
2321 /* This macro is used to compare two pointers to mult_costs against
2322    each other.  The macro returns true if X is cheaper than Y.
2323    Currently, the cheaper of two mult_costs is the one with the
2324    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2325 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2326                                  || ((X)->cost == (Y)->cost     \
2327                                      && (X)->latency < (Y)->latency))
2328
2329 /* This structure records a sequence of operations.
2330    `ops' is the number of operations recorded.
2331    `cost' is their total cost.
2332    The operations are stored in `op' and the corresponding
2333    logarithms of the integer coefficients in `log'.
2334
2335    These are the operations:
2336    alg_zero             total := 0;
2337    alg_m                total := multiplicand;
2338    alg_shift            total := total * coeff
2339    alg_add_t_m2         total := total + multiplicand * coeff;
2340    alg_sub_t_m2         total := total - multiplicand * coeff;
2341    alg_add_factor       total := total * coeff + total;
2342    alg_sub_factor       total := total * coeff - total;
2343    alg_add_t2_m         total := total * coeff + multiplicand;
2344    alg_sub_t2_m         total := total * coeff - multiplicand;
2345
2346    The first operand must be either alg_zero or alg_m.  */
2347
2348 struct algorithm
2349 {
2350   struct mult_cost cost;
2351   short ops;
2352   /* The size of the OP and LOG fields are not directly related to the
2353      word size, but the worst-case algorithms will be if we have few
2354      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2355      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2356      in total wordsize operations.  */
2357   enum alg_code op[MAX_BITS_PER_WORD];
2358   char log[MAX_BITS_PER_WORD];
2359 };
2360
2361 /* The entry for our multiplication cache/hash table.  */
2362 struct alg_hash_entry {
2363   /* The number we are multiplying by.  */
2364   unsigned HOST_WIDE_INT t;
2365
2366   /* The mode in which we are multiplying something by T.  */
2367   enum machine_mode mode;
2368
2369   /* The best multiplication algorithm for t.  */
2370   enum alg_code alg;
2371
2372   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2373      Otherwise, the cost within which multiplication by T is
2374      impossible.  */
2375   struct mult_cost cost;
2376
2377   /* OPtimized for speed? */
2378   bool speed;
2379 };
2380
2381 /* The number of cache/hash entries.  */
2382 #if HOST_BITS_PER_WIDE_INT == 64
2383 #define NUM_ALG_HASH_ENTRIES 1031
2384 #else
2385 #define NUM_ALG_HASH_ENTRIES 307
2386 #endif
2387
2388 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2389    actually a hash table.  If we have a collision, that the older
2390    entry is kicked out.  */
2391 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2392
2393 /* Indicates the type of fixup needed after a constant multiplication.
2394    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2395    the result should be negated, and ADD_VARIANT means that the
2396    multiplicand should be added to the result.  */
2397 enum mult_variant {basic_variant, negate_variant, add_variant};
2398
2399 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2400                         const struct mult_cost *, enum machine_mode mode);
2401 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2402                                  struct algorithm *, enum mult_variant *, int);
2403 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2404                               const struct algorithm *, enum mult_variant);
2405 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2406                                                  int, rtx *, int *, int *);
2407 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2408 static rtx extract_high_half (enum machine_mode, rtx);
2409 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2410 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2411                                        int, int);
2412 /* Compute and return the best algorithm for multiplying by T.
2413    The algorithm must cost less than cost_limit
2414    If retval.cost >= COST_LIMIT, no algorithm was found and all
2415    other field of the returned struct are undefined.
2416    MODE is the machine mode of the multiplication.  */
2417
2418 static void
2419 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2420             const struct mult_cost *cost_limit, enum machine_mode mode)
2421 {
2422   int m;
2423   struct algorithm *alg_in, *best_alg;
2424   struct mult_cost best_cost;
2425   struct mult_cost new_limit;
2426   int op_cost, op_latency;
2427   unsigned HOST_WIDE_INT orig_t = t;
2428   unsigned HOST_WIDE_INT q;
2429   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2430   int hash_index;
2431   bool cache_hit = false;
2432   enum alg_code cache_alg = alg_zero;
2433   bool speed = optimize_insn_for_speed_p ();
2434
2435   /* Indicate that no algorithm is yet found.  If no algorithm
2436      is found, this value will be returned and indicate failure.  */
2437   alg_out->cost.cost = cost_limit->cost + 1;
2438   alg_out->cost.latency = cost_limit->latency + 1;
2439
2440   if (cost_limit->cost < 0
2441       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2442     return;
2443
2444   /* Restrict the bits of "t" to the multiplication's mode.  */
2445   t &= GET_MODE_MASK (mode);
2446
2447   /* t == 1 can be done in zero cost.  */
2448   if (t == 1)
2449     {
2450       alg_out->ops = 1;
2451       alg_out->cost.cost = 0;
2452       alg_out->cost.latency = 0;
2453       alg_out->op[0] = alg_m;
2454       return;
2455     }
2456
2457   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2458      fail now.  */
2459   if (t == 0)
2460     {
2461       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2462         return;
2463       else
2464         {
2465           alg_out->ops = 1;
2466           alg_out->cost.cost = zero_cost[speed];
2467           alg_out->cost.latency = zero_cost[speed];
2468           alg_out->op[0] = alg_zero;
2469           return;
2470         }
2471     }
2472
2473   /* We'll be needing a couple extra algorithm structures now.  */
2474
2475   alg_in = XALLOCA (struct algorithm);
2476   best_alg = XALLOCA (struct algorithm);
2477   best_cost = *cost_limit;
2478
2479   /* Compute the hash index.  */
2480   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2481
2482   /* See if we already know what to do for T.  */
2483   if (alg_hash[hash_index].t == t
2484       && alg_hash[hash_index].mode == mode
2485       && alg_hash[hash_index].mode == mode
2486       && alg_hash[hash_index].speed == speed
2487       && alg_hash[hash_index].alg != alg_unknown)
2488     {
2489       cache_alg = alg_hash[hash_index].alg;
2490
2491       if (cache_alg == alg_impossible)
2492         {
2493           /* The cache tells us that it's impossible to synthesize
2494              multiplication by T within alg_hash[hash_index].cost.  */
2495           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2496             /* COST_LIMIT is at least as restrictive as the one
2497                recorded in the hash table, in which case we have no
2498                hope of synthesizing a multiplication.  Just
2499                return.  */
2500             return;
2501
2502           /* If we get here, COST_LIMIT is less restrictive than the
2503              one recorded in the hash table, so we may be able to
2504              synthesize a multiplication.  Proceed as if we didn't
2505              have the cache entry.  */
2506         }
2507       else
2508         {
2509           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2510             /* The cached algorithm shows that this multiplication
2511                requires more cost than COST_LIMIT.  Just return.  This
2512                way, we don't clobber this cache entry with
2513                alg_impossible but retain useful information.  */
2514             return;
2515
2516           cache_hit = true;
2517
2518           switch (cache_alg)
2519             {
2520             case alg_shift:
2521               goto do_alg_shift;
2522
2523             case alg_add_t_m2:
2524             case alg_sub_t_m2:
2525               goto do_alg_addsub_t_m2;
2526
2527             case alg_add_factor:
2528             case alg_sub_factor:
2529               goto do_alg_addsub_factor;
2530
2531             case alg_add_t2_m:
2532               goto do_alg_add_t2_m;
2533
2534             case alg_sub_t2_m:
2535               goto do_alg_sub_t2_m;
2536
2537             default:
2538               gcc_unreachable ();
2539             }
2540         }
2541     }
2542
2543   /* If we have a group of zero bits at the low-order part of T, try
2544      multiplying by the remaining bits and then doing a shift.  */
2545
2546   if ((t & 1) == 0)
2547     {
2548     do_alg_shift:
2549       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2550       if (m < maxm)
2551         {
2552           q = t >> m;
2553           /* The function expand_shift will choose between a shift and
2554              a sequence of additions, so the observed cost is given as
2555              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2556           op_cost = m * add_cost[speed][mode];
2557           if (shift_cost[speed][mode][m] < op_cost)
2558             op_cost = shift_cost[speed][mode][m];
2559           new_limit.cost = best_cost.cost - op_cost;
2560           new_limit.latency = best_cost.latency - op_cost;
2561           synth_mult (alg_in, q, &new_limit, mode);
2562
2563           alg_in->cost.cost += op_cost;
2564           alg_in->cost.latency += op_cost;
2565           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2566             {
2567               struct algorithm *x;
2568               best_cost = alg_in->cost;
2569               x = alg_in, alg_in = best_alg, best_alg = x;
2570               best_alg->log[best_alg->ops] = m;
2571               best_alg->op[best_alg->ops] = alg_shift;
2572             }
2573
2574           /* See if treating ORIG_T as a signed number yields a better
2575              sequence.  Try this sequence only for a negative ORIG_T
2576              as it would be useless for a non-negative ORIG_T.  */
2577           if ((HOST_WIDE_INT) orig_t < 0)
2578             {
2579               /* Shift ORIG_T as follows because a right shift of a
2580                  negative-valued signed type is implementation
2581                  defined.  */
2582               q = ~(~orig_t >> m);
2583               /* The function expand_shift will choose between a shift
2584                  and a sequence of additions, so the observed cost is
2585                  given as MIN (m * add_cost[speed][mode],
2586                  shift_cost[speed][mode][m]).  */
2587               op_cost = m * add_cost[speed][mode];
2588               if (shift_cost[speed][mode][m] < op_cost)
2589                 op_cost = shift_cost[speed][mode][m];
2590               new_limit.cost = best_cost.cost - op_cost;
2591               new_limit.latency = best_cost.latency - op_cost;
2592               synth_mult (alg_in, q, &new_limit, mode);
2593
2594               alg_in->cost.cost += op_cost;
2595               alg_in->cost.latency += op_cost;
2596               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597                 {
2598                   struct algorithm *x;
2599                   best_cost = alg_in->cost;
2600                   x = alg_in, alg_in = best_alg, best_alg = x;
2601                   best_alg->log[best_alg->ops] = m;
2602                   best_alg->op[best_alg->ops] = alg_shift;
2603                 }
2604             }
2605         }
2606       if (cache_hit)
2607         goto done;
2608     }
2609
2610   /* If we have an odd number, add or subtract one.  */
2611   if ((t & 1) != 0)
2612     {
2613       unsigned HOST_WIDE_INT w;
2614
2615     do_alg_addsub_t_m2:
2616       for (w = 1; (w & t) != 0; w <<= 1)
2617         ;
2618       /* If T was -1, then W will be zero after the loop.  This is another
2619          case where T ends with ...111.  Handling this with (T + 1) and
2620          subtract 1 produces slightly better code and results in algorithm
2621          selection much faster than treating it like the ...0111 case
2622          below.  */
2623       if (w == 0
2624           || (w > 2
2625               /* Reject the case where t is 3.
2626                  Thus we prefer addition in that case.  */
2627               && t != 3))
2628         {
2629           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2630
2631           op_cost = add_cost[speed][mode];
2632           new_limit.cost = best_cost.cost - op_cost;
2633           new_limit.latency = best_cost.latency - op_cost;
2634           synth_mult (alg_in, t + 1, &new_limit, mode);
2635
2636           alg_in->cost.cost += op_cost;
2637           alg_in->cost.latency += op_cost;
2638           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2639             {
2640               struct algorithm *x;
2641               best_cost = alg_in->cost;
2642               x = alg_in, alg_in = best_alg, best_alg = x;
2643               best_alg->log[best_alg->ops] = 0;
2644               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2645             }
2646         }
2647       else
2648         {
2649           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2650
2651           op_cost = add_cost[speed][mode];
2652           new_limit.cost = best_cost.cost - op_cost;
2653           new_limit.latency = best_cost.latency - op_cost;
2654           synth_mult (alg_in, t - 1, &new_limit, mode);
2655
2656           alg_in->cost.cost += op_cost;
2657           alg_in->cost.latency += op_cost;
2658           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2659             {
2660               struct algorithm *x;
2661               best_cost = alg_in->cost;
2662               x = alg_in, alg_in = best_alg, best_alg = x;
2663               best_alg->log[best_alg->ops] = 0;
2664               best_alg->op[best_alg->ops] = alg_add_t_m2;
2665             }
2666         }
2667
2668       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2669          quickly with a - a * n for some appropriate constant n.  */
2670       m = exact_log2 (-orig_t + 1);
2671       if (m >= 0 && m < maxm)
2672         {
2673           op_cost = shiftsub1_cost[speed][mode][m];
2674           new_limit.cost = best_cost.cost - op_cost;
2675           new_limit.latency = best_cost.latency - op_cost;
2676           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2677
2678           alg_in->cost.cost += op_cost;
2679           alg_in->cost.latency += op_cost;
2680           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2681             {
2682               struct algorithm *x;
2683               best_cost = alg_in->cost;
2684               x = alg_in, alg_in = best_alg, best_alg = x;
2685               best_alg->log[best_alg->ops] = m;
2686               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2687             }
2688         }
2689
2690       if (cache_hit)
2691         goto done;
2692     }
2693
2694   /* Look for factors of t of the form
2695      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2696      If we find such a factor, we can multiply by t using an algorithm that
2697      multiplies by q, shift the result by m and add/subtract it to itself.
2698
2699      We search for large factors first and loop down, even if large factors
2700      are less probable than small; if we find a large factor we will find a
2701      good sequence quickly, and therefore be able to prune (by decreasing
2702      COST_LIMIT) the search.  */
2703
2704  do_alg_addsub_factor:
2705   for (m = floor_log2 (t - 1); m >= 2; m--)
2706     {
2707       unsigned HOST_WIDE_INT d;
2708
2709       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2710       if (t % d == 0 && t > d && m < maxm
2711           && (!cache_hit || cache_alg == alg_add_factor))
2712         {
2713           /* If the target has a cheap shift-and-add instruction use
2714              that in preference to a shift insn followed by an add insn.
2715              Assume that the shift-and-add is "atomic" with a latency
2716              equal to its cost, otherwise assume that on superscalar
2717              hardware the shift may be executed concurrently with the
2718              earlier steps in the algorithm.  */
2719           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2720           if (shiftadd_cost[speed][mode][m] < op_cost)
2721             {
2722               op_cost = shiftadd_cost[speed][mode][m];
2723               op_latency = op_cost;
2724             }
2725           else
2726             op_latency = add_cost[speed][mode];
2727
2728           new_limit.cost = best_cost.cost - op_cost;
2729           new_limit.latency = best_cost.latency - op_latency;
2730           synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732           alg_in->cost.cost += op_cost;
2733           alg_in->cost.latency += op_latency;
2734           if (alg_in->cost.latency < op_cost)
2735             alg_in->cost.latency = op_cost;
2736           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737             {
2738               struct algorithm *x;
2739               best_cost = alg_in->cost;
2740               x = alg_in, alg_in = best_alg, best_alg = x;
2741               best_alg->log[best_alg->ops] = m;
2742               best_alg->op[best_alg->ops] = alg_add_factor;
2743             }
2744           /* Other factors will have been taken care of in the recursion.  */
2745           break;
2746         }
2747
2748       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2749       if (t % d == 0 && t > d && m < maxm
2750           && (!cache_hit || cache_alg == alg_sub_factor))
2751         {
2752           /* If the target has a cheap shift-and-subtract insn use
2753              that in preference to a shift insn followed by a sub insn.
2754              Assume that the shift-and-sub is "atomic" with a latency
2755              equal to it's cost, otherwise assume that on superscalar
2756              hardware the shift may be executed concurrently with the
2757              earlier steps in the algorithm.  */
2758           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2759           if (shiftsub0_cost[speed][mode][m] < op_cost)
2760             {
2761               op_cost = shiftsub0_cost[speed][mode][m];
2762               op_latency = op_cost;
2763             }
2764           else
2765             op_latency = add_cost[speed][mode];
2766
2767           new_limit.cost = best_cost.cost - op_cost;
2768           new_limit.latency = best_cost.latency - op_latency;
2769           synth_mult (alg_in, t / d, &new_limit, mode);
2770
2771           alg_in->cost.cost += op_cost;
2772           alg_in->cost.latency += op_latency;
2773           if (alg_in->cost.latency < op_cost)
2774             alg_in->cost.latency = op_cost;
2775           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2776             {
2777               struct algorithm *x;
2778               best_cost = alg_in->cost;
2779               x = alg_in, alg_in = best_alg, best_alg = x;
2780               best_alg->log[best_alg->ops] = m;
2781               best_alg->op[best_alg->ops] = alg_sub_factor;
2782             }
2783           break;
2784         }
2785     }
2786   if (cache_hit)
2787     goto done;
2788
2789   /* Try shift-and-add (load effective address) instructions,
2790      i.e. do a*3, a*5, a*9.  */
2791   if ((t & 1) != 0)
2792     {
2793     do_alg_add_t2_m:
2794       q = t - 1;
2795       q = q & -q;
2796       m = exact_log2 (q);
2797       if (m >= 0 && m < maxm)
2798         {
2799           op_cost = shiftadd_cost[speed][mode][m];
2800           new_limit.cost = best_cost.cost - op_cost;
2801           new_limit.latency = best_cost.latency - op_cost;
2802           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2803
2804           alg_in->cost.cost += op_cost;
2805           alg_in->cost.latency += op_cost;
2806           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2807             {
2808               struct algorithm *x;
2809               best_cost = alg_in->cost;
2810               x = alg_in, alg_in = best_alg, best_alg = x;
2811               best_alg->log[best_alg->ops] = m;
2812               best_alg->op[best_alg->ops] = alg_add_t2_m;
2813             }
2814         }
2815       if (cache_hit)
2816         goto done;
2817
2818     do_alg_sub_t2_m:
2819       q = t + 1;
2820       q = q & -q;
2821       m = exact_log2 (q);
2822       if (m >= 0 && m < maxm)
2823         {
2824           op_cost = shiftsub0_cost[speed][mode][m];
2825           new_limit.cost = best_cost.cost - op_cost;
2826           new_limit.latency = best_cost.latency - op_cost;
2827           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2828
2829           alg_in->cost.cost += op_cost;
2830           alg_in->cost.latency += op_cost;
2831           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2832             {
2833               struct algorithm *x;
2834               best_cost = alg_in->cost;
2835               x = alg_in, alg_in = best_alg, best_alg = x;
2836               best_alg->log[best_alg->ops] = m;
2837               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2838             }
2839         }
2840       if (cache_hit)
2841         goto done;
2842     }
2843
2844  done:
2845   /* If best_cost has not decreased, we have not found any algorithm.  */
2846   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2847     {
2848       /* We failed to find an algorithm.  Record alg_impossible for
2849          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2850          we are asked to find an algorithm for T within the same or
2851          lower COST_LIMIT, we can immediately return to the
2852          caller.  */
2853       alg_hash[hash_index].t = t;
2854       alg_hash[hash_index].mode = mode;
2855       alg_hash[hash_index].speed = speed;
2856       alg_hash[hash_index].alg = alg_impossible;
2857       alg_hash[hash_index].cost = *cost_limit;
2858       return;
2859     }
2860
2861   /* Cache the result.  */
2862   if (!cache_hit)
2863     {
2864       alg_hash[hash_index].t = t;
2865       alg_hash[hash_index].mode = mode;
2866       alg_hash[hash_index].speed = speed;
2867       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2868       alg_hash[hash_index].cost.cost = best_cost.cost;
2869       alg_hash[hash_index].cost.latency = best_cost.latency;
2870     }
2871
2872   /* If we are getting a too long sequence for `struct algorithm'
2873      to record, make this search fail.  */
2874   if (best_alg->ops == MAX_BITS_PER_WORD)
2875     return;
2876
2877   /* Copy the algorithm from temporary space to the space at alg_out.
2878      We avoid using structure assignment because the majority of
2879      best_alg is normally undefined, and this is a critical function.  */
2880   alg_out->ops = best_alg->ops + 1;
2881   alg_out->cost = best_cost;
2882   memcpy (alg_out->op, best_alg->op,
2883           alg_out->ops * sizeof *alg_out->op);
2884   memcpy (alg_out->log, best_alg->log,
2885           alg_out->ops * sizeof *alg_out->log);
2886 }
2887 \f
2888 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2889    Try three variations:
2890
2891        - a shift/add sequence based on VAL itself
2892        - a shift/add sequence based on -VAL, followed by a negation
2893        - a shift/add sequence based on VAL - 1, followed by an addition.
2894
2895    Return true if the cheapest of these cost less than MULT_COST,
2896    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2897
2898 static bool
2899 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2900                      struct algorithm *alg, enum mult_variant *variant,
2901                      int mult_cost)
2902 {
2903   struct algorithm alg2;
2904   struct mult_cost limit;
2905   int op_cost;
2906   bool speed = optimize_insn_for_speed_p ();
2907
2908   /* Fail quickly for impossible bounds.  */
2909   if (mult_cost < 0)
2910     return false;
2911
2912   /* Ensure that mult_cost provides a reasonable upper bound.
2913      Any constant multiplication can be performed with less
2914      than 2 * bits additions.  */
2915   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2916   if (mult_cost > op_cost)
2917     mult_cost = op_cost;
2918
2919   *variant = basic_variant;
2920   limit.cost = mult_cost;
2921   limit.latency = mult_cost;
2922   synth_mult (alg, val, &limit, mode);
2923
2924   /* This works only if the inverted value actually fits in an
2925      `unsigned int' */
2926   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2927     {
2928       op_cost = neg_cost[speed][mode];
2929       if (MULT_COST_LESS (&alg->cost, mult_cost))
2930         {
2931           limit.cost = alg->cost.cost - op_cost;
2932           limit.latency = alg->cost.latency - op_cost;
2933         }
2934       else
2935         {
2936           limit.cost = mult_cost - op_cost;
2937           limit.latency = mult_cost - op_cost;
2938         }
2939
2940       synth_mult (&alg2, -val, &limit, mode);
2941       alg2.cost.cost += op_cost;
2942       alg2.cost.latency += op_cost;
2943       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2944         *alg = alg2, *variant = negate_variant;
2945     }
2946
2947   /* This proves very useful for division-by-constant.  */
2948   op_cost = add_cost[speed][mode];
2949   if (MULT_COST_LESS (&alg->cost, mult_cost))
2950     {
2951       limit.cost = alg->cost.cost - op_cost;
2952       limit.latency = alg->cost.latency - op_cost;
2953     }
2954   else
2955     {
2956       limit.cost = mult_cost - op_cost;
2957       limit.latency = mult_cost - op_cost;
2958     }
2959
2960   synth_mult (&alg2, val - 1, &limit, mode);
2961   alg2.cost.cost += op_cost;
2962   alg2.cost.latency += op_cost;
2963   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2964     *alg = alg2, *variant = add_variant;
2965
2966   return MULT_COST_LESS (&alg->cost, mult_cost);
2967 }
2968
2969 /* A subroutine of expand_mult, used for constant multiplications.
2970    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2971    convenient.  Use the shift/add sequence described by ALG and apply
2972    the final fixup specified by VARIANT.  */
2973
2974 static rtx
2975 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2976                    rtx target, const struct algorithm *alg,
2977                    enum mult_variant variant)
2978 {
2979   HOST_WIDE_INT val_so_far;
2980   rtx insn, accum, tem;
2981   int opno;
2982   enum machine_mode nmode;
2983
2984   /* Avoid referencing memory over and over and invalid sharing
2985      on SUBREGs.  */
2986   op0 = force_reg (mode, op0);
2987
2988   /* ACCUM starts out either as OP0 or as a zero, depending on
2989      the first operation.  */
2990
2991   if (alg->op[0] == alg_zero)
2992     {
2993       accum = copy_to_mode_reg (mode, const0_rtx);
2994       val_so_far = 0;
2995     }
2996   else if (alg->op[0] == alg_m)
2997     {
2998       accum = copy_to_mode_reg (mode, op0);
2999       val_so_far = 1;
3000     }
3001   else
3002     gcc_unreachable ();
3003
3004   for (opno = 1; opno < alg->ops; opno++)
3005     {
3006       int log = alg->log[opno];
3007       rtx shift_subtarget = optimize ? 0 : accum;
3008       rtx add_target
3009         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3010            && !optimize)
3011           ? target : 0;
3012       rtx accum_target = optimize ? 0 : accum;
3013
3014       switch (alg->op[opno])
3015         {
3016         case alg_shift:
3017           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3018                                 build_int_cst (NULL_TREE, log),
3019                                 NULL_RTX, 0);
3020           val_so_far <<= log;
3021           break;
3022
3023         case alg_add_t_m2:
3024           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3025                               build_int_cst (NULL_TREE, log),
3026                               NULL_RTX, 0);
3027           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3028                                  add_target ? add_target : accum_target);
3029           val_so_far += (HOST_WIDE_INT) 1 << log;
3030           break;
3031
3032         case alg_sub_t_m2:
3033           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3034                               build_int_cst (NULL_TREE, log),
3035                               NULL_RTX, 0);
3036           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3037                                  add_target ? add_target : accum_target);
3038           val_so_far -= (HOST_WIDE_INT) 1 << log;
3039           break;
3040
3041         case alg_add_t2_m:
3042           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3043                                 build_int_cst (NULL_TREE, log),
3044                                 shift_subtarget,
3045                                 0);
3046           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3047                                  add_target ? add_target : accum_target);
3048           val_so_far = (val_so_far << log) + 1;
3049           break;
3050
3051         case alg_sub_t2_m:
3052           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3053                                 build_int_cst (NULL_TREE, log),
3054                                 shift_subtarget, 0);
3055           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3056                                  add_target ? add_target : accum_target);
3057           val_so_far = (val_so_far << log) - 1;
3058           break;
3059
3060         case alg_add_factor:
3061           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3062                               build_int_cst (NULL_TREE, log),
3063                               NULL_RTX, 0);
3064           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3065                                  add_target ? add_target : accum_target);
3066           val_so_far += val_so_far << log;
3067           break;
3068
3069         case alg_sub_factor:
3070           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3071                               build_int_cst (NULL_TREE, log),
3072                               NULL_RTX, 0);
3073           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3074                                  (add_target
3075                                   ? add_target : (optimize ? 0 : tem)));
3076           val_so_far = (val_so_far << log) - val_so_far;
3077           break;
3078
3079         default:
3080           gcc_unreachable ();
3081         }
3082
3083       /* Write a REG_EQUAL note on the last insn so that we can cse
3084          multiplication sequences.  Note that if ACCUM is a SUBREG,
3085          we've set the inner register and must properly indicate
3086          that.  */
3087
3088       tem = op0, nmode = mode;
3089       if (GET_CODE (accum) == SUBREG)
3090         {
3091           nmode = GET_MODE (SUBREG_REG (accum));
3092           tem = gen_lowpart (nmode, op0);
3093         }
3094
3095       insn = get_last_insn ();
3096       set_unique_reg_note (insn, REG_EQUAL,
3097                            gen_rtx_MULT (nmode, tem,
3098                                          GEN_INT (val_so_far)));
3099     }
3100
3101   if (variant == negate_variant)
3102     {
3103       val_so_far = -val_so_far;
3104       accum = expand_unop (mode, neg_optab, accum, target, 0);
3105     }
3106   else if (variant == add_variant)
3107     {
3108       val_so_far = val_so_far + 1;
3109       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3110     }
3111
3112   /* Compare only the bits of val and val_so_far that are significant
3113      in the result mode, to avoid sign-/zero-extension confusion.  */
3114   val &= GET_MODE_MASK (mode);
3115   val_so_far &= GET_MODE_MASK (mode);
3116   gcc_assert (val == val_so_far);
3117
3118   return accum;
3119 }
3120
3121 /* Perform a multiplication and return an rtx for the result.
3122    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3123    TARGET is a suggestion for where to store the result (an rtx).
3124
3125    We check specially for a constant integer as OP1.
3126    If you want this check for OP0 as well, then before calling
3127    you should swap the two operands if OP0 would be constant.  */
3128
3129 rtx
3130 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3131              int unsignedp)
3132 {
3133   enum mult_variant variant;
3134   struct algorithm algorithm;
3135   int max_cost;
3136   bool speed = optimize_insn_for_speed_p ();
3137
3138   /* Handling const0_rtx here allows us to use zero as a rogue value for
3139      coeff below.  */
3140   if (op1 == const0_rtx)
3141     return const0_rtx;
3142   if (op1 == const1_rtx)
3143     return op0;
3144   if (op1 == constm1_rtx)
3145     return expand_unop (mode,
3146                         GET_MODE_CLASS (mode) == MODE_INT
3147                         && !unsignedp && flag_trapv
3148                         ? negv_optab : neg_optab,
3149                         op0, target, 0);
3150
3151   /* These are the operations that are potentially turned into a sequence
3152      of shifts and additions.  */
3153   if (SCALAR_INT_MODE_P (mode)
3154       && (unsignedp || !flag_trapv))
3155     {
3156       HOST_WIDE_INT coeff = 0;
3157       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3158
3159       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3160          less than or equal in size to `unsigned int' this doesn't matter.
3161          If the mode is larger than `unsigned int', then synth_mult works
3162          only if the constant value exactly fits in an `unsigned int' without
3163          any truncation.  This means that multiplying by negative values does
3164          not work; results are off by 2^32 on a 32 bit machine.  */
3165
3166       if (CONST_INT_P (op1))
3167         {
3168           /* Attempt to handle multiplication of DImode values by negative
3169              coefficients, by performing the multiplication by a positive
3170              multiplier and then inverting the result.  */
3171           if (INTVAL (op1) < 0
3172               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3173             {
3174               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3175                  result is interpreted as an unsigned coefficient.
3176                  Exclude cost of op0 from max_cost to match the cost
3177                  calculation of the synth_mult.  */
3178               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3179                          - neg_cost[speed][mode];
3180               if (max_cost > 0
3181                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3182                                           &variant, max_cost))
3183                 {
3184                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3185                                                 NULL_RTX, &algorithm,
3186                                                 variant);
3187                   return expand_unop (mode, neg_optab, temp, target, 0);
3188                 }
3189             }
3190           else coeff = INTVAL (op1);
3191         }
3192       else if (GET_CODE (op1) == CONST_DOUBLE)
3193         {
3194           /* If we are multiplying in DImode, it may still be a win
3195              to try to work with shifts and adds.  */
3196           if (CONST_DOUBLE_HIGH (op1) == 0
3197               && CONST_DOUBLE_LOW (op1) > 0)
3198             coeff = CONST_DOUBLE_LOW (op1);
3199           else if (CONST_DOUBLE_LOW (op1) == 0
3200                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3201             {
3202               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3203                           + HOST_BITS_PER_WIDE_INT;
3204               return expand_shift (LSHIFT_EXPR, mode, op0,
3205                                    build_int_cst (NULL_TREE, shift),
3206                                    target, unsignedp);
3207             }
3208         }
3209
3210       /* We used to test optimize here, on the grounds that it's better to
3211          produce a smaller program when -O is not used.  But this causes
3212          such a terrible slowdown sometimes that it seems better to always
3213          use synth_mult.  */
3214       if (coeff != 0)
3215         {
3216           /* Special case powers of two.  */
3217           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3218             return expand_shift (LSHIFT_EXPR, mode, op0,
3219                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3220                                  target, unsignedp);
3221
3222           /* Exclude cost of op0 from max_cost to match the cost
3223              calculation of the synth_mult.  */
3224           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3225           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3226                                    max_cost))
3227             return expand_mult_const (mode, op0, coeff, target,
3228                                       &algorithm, variant);
3229         }
3230     }
3231
3232   if (GET_CODE (op0) == CONST_DOUBLE)
3233     {
3234       rtx temp = op0;
3235       op0 = op1;
3236       op1 = temp;
3237     }
3238
3239   /* Expand x*2.0 as x+x.  */
3240   if (GET_CODE (op1) == CONST_DOUBLE
3241       && SCALAR_FLOAT_MODE_P (mode))
3242     {
3243       REAL_VALUE_TYPE d;
3244       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3245
3246       if (REAL_VALUES_EQUAL (d, dconst2))
3247         {
3248           op0 = force_reg (GET_MODE (op0), op0);
3249           return expand_binop (mode, add_optab, op0, op0,
3250                                target, unsignedp, OPTAB_LIB_WIDEN);
3251         }
3252     }
3253
3254   /* This used to use umul_optab if unsigned, but for non-widening multiply
3255      there is no difference between signed and unsigned.  */
3256   op0 = expand_binop (mode,
3257                       ! unsignedp
3258                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3259                       ? smulv_optab : smul_optab,
3260                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3261   gcc_assert (op0);
3262   return op0;
3263 }
3264
3265 /* Perform a widening multiplication and return an rtx for the result.
3266    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267    TARGET is a suggestion for where to store the result (an rtx).
3268    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269    or smul_widen_optab.
3270
3271    We check specially for a constant integer as OP1, comparing the
3272    cost of a widening multiply against the cost of a sequence of shifts
3273    and adds.  */
3274
3275 rtx
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277                       int unsignedp, optab this_optab)
3278 {
3279   bool speed = optimize_insn_for_speed_p ();
3280
3281   if (CONST_INT_P (op1)
3282       && (INTVAL (op1) >= 0
3283           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3284     {
3285       HOST_WIDE_INT coeff = INTVAL (op1);
3286       int max_cost;
3287       enum mult_variant variant;
3288       struct algorithm algorithm;
3289
3290       /* Special case powers of two.  */
3291       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3292         {
3293           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3294           return expand_shift (LSHIFT_EXPR, mode, op0,
3295                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3296                                target, unsignedp);
3297         }
3298
3299       /* Exclude cost of op0 from max_cost to match the cost
3300          calculation of the synth_mult.  */
3301       max_cost = mul_widen_cost[speed][mode];
3302       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3303                                max_cost))
3304         {
3305           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3306           return expand_mult_const (mode, op0, coeff, target,
3307                                     &algorithm, variant);
3308         }
3309     }
3310   return expand_binop (mode, this_optab, op0, op1, target,
3311                        unsignedp, OPTAB_LIB_WIDEN);
3312 }
3313 \f
3314 /* Return the smallest n such that 2**n >= X.  */
3315
3316 int
3317 ceil_log2 (unsigned HOST_WIDE_INT x)
3318 {
3319   return floor_log2 (x - 1) + 1;
3320 }
3321
3322 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3323    replace division by D, and put the least significant N bits of the result
3324    in *MULTIPLIER_PTR and return the most significant bit.
3325
3326    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3327    needed precision is in PRECISION (should be <= N).
3328
3329    PRECISION should be as small as possible so this function can choose
3330    multiplier more freely.
3331
3332    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3333    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3334
3335    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3336    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3337
3338 static
3339 unsigned HOST_WIDE_INT
3340 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3341                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3342 {
3343   HOST_WIDE_INT mhigh_hi, mlow_hi;
3344   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3345   int lgup, post_shift;
3346   int pow, pow2;
3347   unsigned HOST_WIDE_INT nl, dummy1;
3348   HOST_WIDE_INT nh, dummy2;
3349
3350   /* lgup = ceil(log2(divisor)); */
3351   lgup = ceil_log2 (d);
3352
3353   gcc_assert (lgup <= n);
3354
3355   pow = n + lgup;
3356   pow2 = n + lgup - precision;
3357
3358   /* We could handle this with some effort, but this case is much
3359      better handled directly with a scc insn, so rely on caller using
3360      that.  */
3361   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3362
3363   /* mlow = 2^(N + lgup)/d */
3364  if (pow >= HOST_BITS_PER_WIDE_INT)
3365     {
3366       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3367       nl = 0;
3368     }
3369   else
3370     {
3371       nh = 0;
3372       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3373     }
3374   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3375                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3376
3377   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3378   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3379     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3380   else
3381     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3382   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3383                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3384
3385   gcc_assert (!mhigh_hi || nh - d < d);
3386   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3387   /* Assert that mlow < mhigh.  */
3388   gcc_assert (mlow_hi < mhigh_hi
3389               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3390
3391   /* If precision == N, then mlow, mhigh exceed 2^N
3392      (but they do not exceed 2^(N+1)).  */
3393
3394   /* Reduce to lowest terms.  */
3395   for (post_shift = lgup; post_shift > 0; post_shift--)
3396     {
3397       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3398       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3399       if (ml_lo >= mh_lo)
3400         break;
3401
3402       mlow_hi = 0;
3403       mlow_lo = ml_lo;
3404       mhigh_hi = 0;
3405       mhigh_lo = mh_lo;
3406     }
3407
3408   *post_shift_ptr = post_shift;
3409   *lgup_ptr = lgup;
3410   if (n < HOST_BITS_PER_WIDE_INT)
3411     {
3412       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3413       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3414       return mhigh_lo >= mask;
3415     }
3416   else
3417     {
3418       *multiplier_ptr = GEN_INT (mhigh_lo);
3419       return mhigh_hi;
3420     }
3421 }
3422
3423 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3424    congruent to 1 (mod 2**N).  */
3425
3426 static unsigned HOST_WIDE_INT
3427 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3428 {
3429   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3430
3431   /* The algorithm notes that the choice y = x satisfies
3432      x*y == 1 mod 2^3, since x is assumed odd.
3433      Each iteration doubles the number of bits of significance in y.  */
3434
3435   unsigned HOST_WIDE_INT mask;
3436   unsigned HOST_WIDE_INT y = x;
3437   int nbit = 3;
3438
3439   mask = (n == HOST_BITS_PER_WIDE_INT
3440           ? ~(unsigned HOST_WIDE_INT) 0
3441           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3442
3443   while (nbit < n)
3444     {
3445       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3446       nbit *= 2;
3447     }
3448   return y;
3449 }
3450
3451 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3452    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3453    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3454    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3455    become signed.
3456
3457    The result is put in TARGET if that is convenient.
3458
3459    MODE is the mode of operation.  */
3460
3461 rtx
3462 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3463                              rtx op1, rtx target, int unsignedp)
3464 {
3465   rtx tem;
3466   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3467
3468   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3469                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3470                       NULL_RTX, 0);
3471   tem = expand_and (mode, tem, op1, NULL_RTX);
3472   adj_operand
3473     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3474                      adj_operand);
3475
3476   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3477                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3478                       NULL_RTX, 0);
3479   tem = expand_and (mode, tem, op0, NULL_RTX);
3480   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3481                           target);
3482
3483   return target;
3484 }
3485
3486 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3487
3488 static rtx
3489 extract_high_half (enum machine_mode mode, rtx op)
3490 {
3491   enum machine_mode wider_mode;
3492
3493   if (mode == word_mode)
3494     return gen_highpart (mode, op);
3495
3496   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3497
3498   wider_mode = GET_MODE_WIDER_MODE (mode);
3499   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3500                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3501   return convert_modes (mode, wider_mode, op, 0);
3502 }
3503
3504 /* Like expand_mult_highpart, but only consider using a multiplication
3505    optab.  OP1 is an rtx for the constant operand.  */
3506
3507 static rtx
3508 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3509                             rtx target, int unsignedp, int max_cost)
3510 {
3511   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3512   enum machine_mode wider_mode;
3513   optab moptab;
3514   rtx tem;
3515   int size;
3516   bool speed = optimize_insn_for_speed_p ();
3517
3518   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3519
3520   wider_mode = GET_MODE_WIDER_MODE (mode);
3521   size = GET_MODE_BITSIZE (mode);
3522
3523   /* Firstly, try using a multiplication insn that only generates the needed
3524      high part of the product, and in the sign flavor of unsignedp.  */
3525   if (mul_highpart_cost[speed][mode] < max_cost)
3526     {
3527       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3528       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3529                           unsignedp, OPTAB_DIRECT);
3530       if (tem)
3531         return tem;
3532     }
3533
3534   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3535      Need to adjust the result after the multiplication.  */
3536   if (size - 1 < BITS_PER_WORD
3537       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3538           + 4 * add_cost[speed][mode] < max_cost))
3539     {
3540       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3541       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3542                           unsignedp, OPTAB_DIRECT);
3543       if (tem)
3544         /* We used the wrong signedness.  Adjust the result.  */
3545         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3546                                             tem, unsignedp);
3547     }
3548
3549   /* Try widening multiplication.  */
3550   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3551   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3552       && mul_widen_cost[speed][wider_mode] < max_cost)
3553     {
3554       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3555                           unsignedp, OPTAB_WIDEN);
3556       if (tem)
3557         return extract_high_half (mode, tem);
3558     }
3559
3560   /* Try widening the mode and perform a non-widening multiplication.  */
3561   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3562       && size - 1 < BITS_PER_WORD
3563       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3564     {
3565       rtx insns, wop0, wop1;
3566
3567       /* We need to widen the operands, for example to ensure the
3568          constant multiplier is correctly sign or zero extended.
3569          Use a sequence to clean-up any instructions emitted by
3570          the conversions if things don't work out.  */
3571       start_sequence ();
3572       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3573       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3574       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3575                           unsignedp, OPTAB_WIDEN);
3576       insns = get_insns ();
3577       end_sequence ();
3578
3579       if (tem)
3580         {
3581           emit_insn (insns);
3582           return extract_high_half (mode, tem);
3583         }
3584     }
3585
3586   /* Try widening multiplication of opposite signedness, and adjust.  */
3587   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3588   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3589       && size - 1 < BITS_PER_WORD
3590       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3591           + 4 * add_cost[speed][mode] < max_cost))
3592     {
3593       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3594                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3595       if (tem != 0)
3596         {
3597           tem = extract_high_half (mode, tem);
3598           /* We used the wrong signedness.  Adjust the result.  */
3599           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3600                                               target, unsignedp);
3601         }
3602     }
3603
3604   return 0;
3605 }
3606
3607 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3608    putting the high half of the result in TARGET if that is convenient,
3609    and return where the result is.  If the operation can not be performed,
3610    0 is returned.
3611
3612    MODE is the mode of operation and result.
3613
3614    UNSIGNEDP nonzero means unsigned multiply.
3615
3616    MAX_COST is the total allowed cost for the expanded RTL.  */
3617
3618 static rtx
3619 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3620                       rtx target, int unsignedp, int max_cost)
3621 {
3622   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3623   unsigned HOST_WIDE_INT cnst1;
3624   int extra_cost;
3625   bool sign_adjust = false;
3626   enum mult_variant variant;
3627   struct algorithm alg;
3628   rtx tem;
3629   bool speed = optimize_insn_for_speed_p ();
3630
3631   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3632   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3633   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3634
3635   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3636
3637   /* We can't optimize modes wider than BITS_PER_WORD.
3638      ??? We might be able to perform double-word arithmetic if
3639      mode == word_mode, however all the cost calculations in
3640      synth_mult etc. assume single-word operations.  */
3641   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3642     return expand_mult_highpart_optab (mode, op0, op1, target,
3643                                        unsignedp, max_cost);
3644
3645   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3646
3647   /* Check whether we try to multiply by a negative constant.  */
3648   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3649     {
3650       sign_adjust = true;
3651       extra_cost += add_cost[speed][mode];
3652     }
3653
3654   /* See whether shift/add multiplication is cheap enough.  */
3655   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3656                            max_cost - extra_cost))
3657     {
3658       /* See whether the specialized multiplication optabs are
3659          cheaper than the shift/add version.  */
3660       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3661                                         alg.cost.cost + extra_cost);
3662       if (tem)
3663         return tem;
3664
3665       tem = convert_to_mode (wider_mode, op0, unsignedp);
3666       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3667       tem = extract_high_half (mode, tem);
3668
3669       /* Adjust result for signedness.  */
3670       if (sign_adjust)
3671         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3672
3673       return tem;
3674     }
3675   return expand_mult_highpart_optab (mode, op0, op1, target,
3676                                      unsignedp, max_cost);
3677 }
3678
3679
3680 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3681
3682 static rtx
3683 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3684 {
3685   unsigned HOST_WIDE_INT masklow, maskhigh;
3686   rtx result, temp, shift, label;
3687   int logd;
3688
3689   logd = floor_log2 (d);
3690   result = gen_reg_rtx (mode);
3691
3692   /* Avoid conditional branches when they're expensive.  */
3693   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3694       && optimize_insn_for_speed_p ())
3695     {
3696       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3697                                       mode, 0, -1);
3698       if (signmask)
3699         {
3700           signmask = force_reg (mode, signmask);
3701           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3702           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3703
3704           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3705              which instruction sequence to use.  If logical right shifts
3706              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3707              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3708
3709           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3710           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3711               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3712             {
3713               temp = expand_binop (mode, xor_optab, op0, signmask,
3714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715               temp = expand_binop (mode, sub_optab, temp, signmask,
3716                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3718                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719               temp = expand_binop (mode, xor_optab, temp, signmask,
3720                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721               temp = expand_binop (mode, sub_optab, temp, signmask,
3722                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723             }
3724           else
3725             {
3726               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3727                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728               signmask = force_reg (mode, signmask);
3729
3730               temp = expand_binop (mode, add_optab, op0, signmask,
3731                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3733                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3734               temp = expand_binop (mode, sub_optab, temp, signmask,
3735                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3736             }
3737           return temp;
3738         }
3739     }
3740
3741   /* Mask contains the mode's signbit and the significant bits of the
3742      modulus.  By including the signbit in the operation, many targets
3743      can avoid an explicit compare operation in the following comparison
3744      against zero.  */
3745
3746   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3747   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3748     {
3749       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3750       maskhigh = -1;
3751     }
3752   else
3753     maskhigh = (HOST_WIDE_INT) -1
3754                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3755
3756   temp = expand_binop (mode, and_optab, op0,
3757                        immed_double_const (masklow, maskhigh, mode),
3758                        result, 1, OPTAB_LIB_WIDEN);
3759   if (temp != result)
3760     emit_move_insn (result, temp);
3761
3762   label = gen_label_rtx ();
3763   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3764
3765   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3766                        0, OPTAB_LIB_WIDEN);
3767   masklow = (HOST_WIDE_INT) -1 << logd;
3768   maskhigh = -1;
3769   temp = expand_binop (mode, ior_optab, temp,
3770                        immed_double_const (masklow, maskhigh, mode),
3771                        result, 1, OPTAB_LIB_WIDEN);
3772   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3773                        0, OPTAB_LIB_WIDEN);
3774   if (temp != result)
3775     emit_move_insn (result, temp);
3776   emit_label (label);
3777   return result;
3778 }
3779
3780 /* Expand signed division of OP0 by a power of two D in mode MODE.
3781    This routine is only called for positive values of D.  */
3782
3783 static rtx
3784 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3785 {
3786   rtx temp, label;
3787   tree shift;
3788   int logd;
3789
3790   logd = floor_log2 (d);
3791   shift = build_int_cst (NULL_TREE, logd);
3792
3793   if (d == 2
3794       && BRANCH_COST (optimize_insn_for_speed_p (),
3795                       false) >= 1)
3796     {
3797       temp = gen_reg_rtx (mode);
3798       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3799       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3800                            0, OPTAB_LIB_WIDEN);
3801       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3802     }
3803
3804 #ifdef HAVE_conditional_move
3805   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3806       >= 2)
3807     {
3808       rtx temp2;
3809
3810       /* ??? emit_conditional_move forces a stack adjustment via
3811          compare_from_rtx so, if the sequence is discarded, it will
3812          be lost.  Do it now instead.  */
3813       do_pending_stack_adjust ();
3814
3815       start_sequence ();
3816       temp2 = copy_to_mode_reg (mode, op0);
3817       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3818                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3819       temp = force_reg (mode, temp);
3820
3821       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3822       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3823                                      mode, temp, temp2, mode, 0);
3824       if (temp2)
3825         {
3826           rtx seq = get_insns ();
3827           end_sequence ();
3828           emit_insn (seq);
3829           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3830         }
3831       end_sequence ();
3832     }
3833 #endif
3834
3835   if (BRANCH_COST (optimize_insn_for_speed_p (),
3836                    false) >= 2)
3837     {
3838       int ushift = GET_MODE_BITSIZE (mode) - logd;
3839
3840       temp = gen_reg_rtx (mode);
3841       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3842       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3843         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3844                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3845       else
3846         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3847                              build_int_cst (NULL_TREE, ushift),
3848                              NULL_RTX, 1);
3849       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3850                            0, OPTAB_LIB_WIDEN);
3851       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3852     }
3853
3854   label = gen_label_rtx ();
3855   temp = copy_to_mode_reg (mode, op0);
3856   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3857   expand_inc (temp, GEN_INT (d - 1));
3858   emit_label (label);
3859   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3860 }
3861 \f
3862 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3863    if that is convenient, and returning where the result is.
3864    You may request either the quotient or the remainder as the result;
3865    specify REM_FLAG nonzero to get the remainder.
3866
3867    CODE is the expression code for which kind of division this is;
3868    it controls how rounding is done.  MODE is the machine mode to use.
3869    UNSIGNEDP nonzero means do unsigned division.  */
3870
3871 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3872    and then correct it by or'ing in missing high bits
3873    if result of ANDI is nonzero.
3874    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3875    This could optimize to a bfexts instruction.
3876    But C doesn't use these operations, so their optimizations are
3877    left for later.  */
3878 /* ??? For modulo, we don't actually need the highpart of the first product,
3879    the low part will do nicely.  And for small divisors, the second multiply
3880    can also be a low-part only multiply or even be completely left out.
3881    E.g. to calculate the remainder of a division by 3 with a 32 bit
3882    multiply, multiply with 0x55555556 and extract the upper two bits;
3883    the result is exact for inputs up to 0x1fffffff.
3884    The input range can be reduced by using cross-sum rules.
3885    For odd divisors >= 3, the following table gives right shift counts
3886    so that if a number is shifted by an integer multiple of the given
3887    amount, the remainder stays the same:
3888    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3889    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3890    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3891    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3892    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3893
3894    Cross-sum rules for even numbers can be derived by leaving as many bits
3895    to the right alone as the divisor has zeros to the right.
3896    E.g. if x is an unsigned 32 bit number:
3897    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3898    */
3899
3900 rtx
3901 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3902                rtx op0, rtx op1, rtx target, int unsignedp)
3903 {
3904   enum machine_mode compute_mode;
3905   rtx tquotient;
3906   rtx quotient = 0, remainder = 0;
3907   rtx last;
3908   int size;
3909   rtx insn, set;
3910   optab optab1, optab2;
3911   int op1_is_constant, op1_is_pow2 = 0;
3912   int max_cost, extra_cost;
3913   static HOST_WIDE_INT last_div_const = 0;
3914   static HOST_WIDE_INT ext_op1;
3915   bool speed = optimize_insn_for_speed_p ();
3916
3917   op1_is_constant = CONST_INT_P (op1);
3918   if (op1_is_constant)
3919     {
3920       ext_op1 = INTVAL (op1);
3921       if (unsignedp)
3922         ext_op1 &= GET_MODE_MASK (mode);
3923       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3924                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3925     }
3926
3927   /*
3928      This is the structure of expand_divmod:
3929
3930      First comes code to fix up the operands so we can perform the operations
3931      correctly and efficiently.
3932
3933      Second comes a switch statement with code specific for each rounding mode.
3934      For some special operands this code emits all RTL for the desired
3935      operation, for other cases, it generates only a quotient and stores it in
3936      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3937      to indicate that it has not done anything.
3938
3939      Last comes code that finishes the operation.  If QUOTIENT is set and
3940      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3941      QUOTIENT is not set, it is computed using trunc rounding.
3942
3943      We try to generate special code for division and remainder when OP1 is a
3944      constant.  If |OP1| = 2**n we can use shifts and some other fast
3945      operations.  For other values of OP1, we compute a carefully selected
3946      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3947      by m.
3948
3949      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3950      half of the product.  Different strategies for generating the product are
3951      implemented in expand_mult_highpart.
3952
3953      If what we actually want is the remainder, we generate that by another
3954      by-constant multiplication and a subtraction.  */
3955
3956   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3957      code below will malfunction if we are, so check here and handle
3958      the special case if so.  */
3959   if (op1 == const1_rtx)
3960     return rem_flag ? const0_rtx : op0;
3961
3962     /* When dividing by -1, we could get an overflow.
3963      negv_optab can handle overflows.  */
3964   if (! unsignedp && op1 == constm1_rtx)
3965     {
3966       if (rem_flag)
3967         return const0_rtx;
3968       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3969                           ? negv_optab : neg_optab, op0, target, 0);
3970     }
3971
3972   if (target
3973       /* Don't use the function value register as a target
3974          since we have to read it as well as write it,
3975          and function-inlining gets confused by this.  */
3976       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3977           /* Don't clobber an operand while doing a multi-step calculation.  */
3978           || ((rem_flag || op1_is_constant)
3979               && (reg_mentioned_p (target, op0)
3980                   || (MEM_P (op0) && MEM_P (target))))
3981           || reg_mentioned_p (target, op1)
3982           || (MEM_P (op1) && MEM_P (target))))
3983     target = 0;
3984
3985   /* Get the mode in which to perform this computation.  Normally it will
3986      be MODE, but sometimes we can't do the desired operation in MODE.
3987      If so, pick a wider mode in which we can do the operation.  Convert
3988      to that mode at the start to avoid repeated conversions.
3989
3990      First see what operations we need.  These depend on the expression
3991      we are evaluating.  (We assume that divxx3 insns exist under the
3992      same conditions that modxx3 insns and that these insns don't normally
3993      fail.  If these assumptions are not correct, we may generate less
3994      efficient code in some cases.)
3995
3996      Then see if we find a mode in which we can open-code that operation
3997      (either a division, modulus, or shift).  Finally, check for the smallest
3998      mode for which we can do the operation with a library call.  */
3999
4000   /* We might want to refine this now that we have division-by-constant
4001      optimization.  Since expand_mult_highpart tries so many variants, it is
4002      not straightforward to generalize this.  Maybe we should make an array
4003      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4004
4005   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4006             ? (unsignedp ? lshr_optab : ashr_optab)
4007             : (unsignedp ? udiv_optab : sdiv_optab));
4008   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4009             ? optab1
4010             : (unsignedp ? udivmod_optab : sdivmod_optab));
4011
4012   for (compute_mode = mode; compute_mode != VOIDmode;
4013        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4014     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4015         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4016       break;
4017
4018   if (compute_mode == VOIDmode)
4019     for (compute_mode = mode; compute_mode != VOIDmode;
4020          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4021       if (optab_libfunc (optab1, compute_mode)
4022           || optab_libfunc (optab2, compute_mode))
4023         break;
4024
4025   /* If we still couldn't find a mode, use MODE, but expand_binop will
4026      probably die.  */
4027   if (compute_mode == VOIDmode)
4028     compute_mode = mode;
4029
4030   if (target && GET_MODE (target) == compute_mode)
4031     tquotient = target;
4032   else
4033     tquotient = gen_reg_rtx (compute_mode);
4034
4035   size = GET_MODE_BITSIZE (compute_mode);
4036 #if 0
4037   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4038      (mode), and thereby get better code when OP1 is a constant.  Do that
4039      later.  It will require going over all usages of SIZE below.  */
4040   size = GET_MODE_BITSIZE (mode);
4041 #endif
4042
4043   /* Only deduct something for a REM if the last divide done was
4044      for a different constant.   Then set the constant of the last
4045      divide.  */
4046   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4047   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4048                      && INTVAL (op1) == last_div_const))
4049     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4050
4051   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4052
4053   /* Now convert to the best mode to use.  */
4054   if (compute_mode != mode)
4055     {
4056       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4057       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4058
4059       /* convert_modes may have placed op1 into a register, so we
4060          must recompute the following.  */
4061       op1_is_constant = CONST_INT_P (op1);
4062       op1_is_pow2 = (op1_is_constant
4063                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4064                           || (! unsignedp
4065                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4066     }
4067
4068   /* If one of the operands is a volatile MEM, copy it into a register.  */
4069
4070   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4071     op0 = force_reg (compute_mode, op0);
4072   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4073     op1 = force_reg (compute_mode, op1);
4074
4075   /* If we need the remainder or if OP1 is constant, we need to
4076      put OP0 in a register in case it has any queued subexpressions.  */
4077   if (rem_flag || op1_is_constant)
4078     op0 = force_reg (compute_mode, op0);
4079
4080   last = get_last_insn ();
4081
4082   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4083   if (unsignedp)
4084     {
4085       if (code == FLOOR_DIV_EXPR)
4086         code = TRUNC_DIV_EXPR;
4087       if (code == FLOOR_MOD_EXPR)
4088         code = TRUNC_MOD_EXPR;
4089       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4090         code = TRUNC_DIV_EXPR;
4091     }
4092
4093   if (op1 != const0_rtx)
4094     switch (code)
4095       {
4096       case TRUNC_MOD_EXPR:
4097       case TRUNC_DIV_EXPR:
4098         if (op1_is_constant)
4099           {
4100             if (unsignedp)
4101               {
4102                 unsigned HOST_WIDE_INT mh;
4103                 int pre_shift, post_shift;
4104                 int dummy;
4105                 rtx ml;
4106                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4107                                             & GET_MODE_MASK (compute_mode));
4108
4109                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4110                   {
4111                     pre_shift = floor_log2 (d);
4112                     if (rem_flag)
4113                       {
4114                         remainder
4115                           = expand_binop (compute_mode, and_optab, op0,
4116                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4117                                           remainder, 1,
4118                                           OPTAB_LIB_WIDEN);
4119                         if (remainder)
4120                           return gen_lowpart (mode, remainder);
4121                       }
4122                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4123                                              build_int_cst (NULL_TREE,
4124                                                             pre_shift),
4125                                              tquotient, 1);
4126                   }
4127                 else if (size <= HOST_BITS_PER_WIDE_INT)
4128                   {
4129                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4130                       {
4131                         /* Most significant bit of divisor is set; emit an scc
4132                            insn.  */
4133                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4134                                                           compute_mode, 1, 1);
4135                       }
4136                     else
4137                       {
4138                         /* Find a suitable multiplier and right shift count
4139                            instead of multiplying with D.  */
4140
4141                         mh = choose_multiplier (d, size, size,
4142                                                 &ml, &post_shift, &dummy);
4143
4144                         /* If the suggested multiplier is more than SIZE bits,
4145                            we can do better for even divisors, using an
4146                            initial right shift.  */
4147                         if (mh != 0 && (d & 1) == 0)
4148                           {
4149                             pre_shift = floor_log2 (d & -d);
4150                             mh = choose_multiplier (d >> pre_shift, size,
4151                                                     size - pre_shift,
4152                                                     &ml, &post_shift, &dummy);
4153                             gcc_assert (!mh);
4154                           }
4155                         else
4156                           pre_shift = 0;
4157
4158                         if (mh != 0)
4159                           {
4160                             rtx t1, t2, t3, t4;
4161
4162                             if (post_shift - 1 >= BITS_PER_WORD)
4163                               goto fail1;
4164
4165                             extra_cost
4166                               = (shift_cost[speed][compute_mode][post_shift - 1]
4167                                  + shift_cost[speed][compute_mode][1]
4168                                  + 2 * add_cost[speed][compute_mode]);
4169                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4170                                                        NULL_RTX, 1,
4171                                                        max_cost - extra_cost);
4172                             if (t1 == 0)
4173                               goto fail1;
4174                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4175                                                                op0, t1),
4176                                                 NULL_RTX);
4177                             t3 = expand_shift
4178                               (RSHIFT_EXPR, compute_mode, t2,
4179                                build_int_cst (NULL_TREE, 1),
4180                                NULL_RTX,1);
4181                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4182                                                               t1, t3),
4183                                                 NULL_RTX);
4184                             quotient = expand_shift
4185                               (RSHIFT_EXPR, compute_mode, t4,
4186                                build_int_cst (NULL_TREE, post_shift - 1),
4187                                tquotient, 1);
4188                           }
4189                         else
4190                           {
4191                             rtx t1, t2;
4192
4193                             if (pre_shift >= BITS_PER_WORD
4194                                 || post_shift >= BITS_PER_WORD)
4195                               goto fail1;
4196
4197                             t1 = expand_shift
4198                               (RSHIFT_EXPR, compute_mode, op0,
4199                                build_int_cst (NULL_TREE, pre_shift),
4200                                NULL_RTX, 1);
4201                             extra_cost
4202                               = (shift_cost[speed][compute_mode][pre_shift]
4203                                  + shift_cost[speed][compute_mode][post_shift]);
4204                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4205                                                        NULL_RTX, 1,
4206                                                        max_cost - extra_cost);
4207                             if (t2 == 0)
4208                               goto fail1;
4209                             quotient = expand_shift
4210                               (RSHIFT_EXPR, compute_mode, t2,
4211                                build_int_cst (NULL_TREE, post_shift),
4212                                tquotient, 1);
4213                           }
4214                       }
4215                   }
4216                 else            /* Too wide mode to use tricky code */
4217                   break;
4218
4219                 insn = get_last_insn ();
4220                 if (insn != last
4221                     && (set = single_set (insn)) != 0
4222                     && SET_DEST (set) == quotient)
4223                   set_unique_reg_note (insn,
4224                                        REG_EQUAL,
4225                                        gen_rtx_UDIV (compute_mode, op0, op1));
4226               }
4227             else                /* TRUNC_DIV, signed */
4228               {
4229                 unsigned HOST_WIDE_INT ml;
4230                 int lgup, post_shift;
4231                 rtx mlr;
4232                 HOST_WIDE_INT d = INTVAL (op1);
4233                 unsigned HOST_WIDE_INT abs_d;
4234
4235                 /* Since d might be INT_MIN, we have to cast to
4236                    unsigned HOST_WIDE_INT before negating to avoid
4237                    undefined signed overflow.  */
4238                 abs_d = (d >= 0
4239                          ? (unsigned HOST_WIDE_INT) d
4240                          : - (unsigned HOST_WIDE_INT) d);
4241
4242                 /* n rem d = n rem -d */
4243                 if (rem_flag && d < 0)
4244                   {
4245                     d = abs_d;
4246                     op1 = gen_int_mode (abs_d, compute_mode);
4247                   }
4248
4249                 if (d == 1)
4250                   quotient = op0;
4251                 else if (d == -1)
4252                   quotient = expand_unop (compute_mode, neg_optab, op0,
4253                                           tquotient, 0);
4254                 else if (HOST_BITS_PER_WIDE_INT >= size
4255                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4256                   {
4257                     /* This case is not handled correctly below.  */
4258                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4259                                                 compute_mode, 1, 1);
4260                     if (quotient == 0)
4261                       goto fail1;
4262                   }
4263                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4264                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4265                                       : sdiv_pow2_cheap[speed][compute_mode])
4266                          /* We assume that cheap metric is true if the
4267                             optab has an expander for this mode.  */
4268                          && ((optab_handler ((rem_flag ? smod_optab
4269                                               : sdiv_optab),
4270                                              compute_mode)
4271                               != CODE_FOR_nothing)
4272                              || (optab_handler (sdivmod_optab,
4273                                                 compute_mode)
4274                                  != CODE_FOR_nothing)))
4275                   ;
4276                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4277                   {
4278                     if (rem_flag)
4279                       {
4280                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4281                         if (remainder)
4282                           return gen_lowpart (mode, remainder);
4283                       }
4284
4285                     if (sdiv_pow2_cheap[speed][compute_mode]
4286                         && ((optab_handler (sdiv_optab, compute_mode)
4287                              != CODE_FOR_nothing)
4288                             || (optab_handler (sdivmod_optab, compute_mode)
4289                                 != CODE_FOR_nothing)))
4290                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4291                                                 compute_mode, op0,
4292                                                 gen_int_mode (abs_d,
4293                                                               compute_mode),
4294                                                 NULL_RTX, 0);
4295                     else
4296                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4297
4298                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4299                        negate the quotient.  */
4300                     if (d < 0)
4301                       {
4302                         insn = get_last_insn ();
4303                         if (insn != last
4304                             && (set = single_set (insn)) != 0
4305                             && SET_DEST (set) == quotient
4306                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4307                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4308                           set_unique_reg_note (insn,
4309                                                REG_EQUAL,
4310                                                gen_rtx_DIV (compute_mode,
4311                                                             op0,
4312                                                             GEN_INT
4313                                                             (trunc_int_for_mode
4314                                                              (abs_d,
4315                                                               compute_mode))));
4316
4317                         quotient = expand_unop (compute_mode, neg_optab,
4318                                                 quotient, quotient, 0);
4319                       }
4320                   }
4321                 else if (size <= HOST_BITS_PER_WIDE_INT)
4322                   {
4323                     choose_multiplier (abs_d, size, size - 1,
4324                                        &mlr, &post_shift, &lgup);
4325                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4326                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4327                       {
4328                         rtx t1, t2, t3;
4329
4330                         if (post_shift >= BITS_PER_WORD
4331                             || size - 1 >= BITS_PER_WORD)
4332                           goto fail1;
4333
4334                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4335                                       + shift_cost[speed][compute_mode][size - 1]
4336                                       + add_cost[speed][compute_mode]);
4337                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4338                                                    NULL_RTX, 0,
4339                                                    max_cost - extra_cost);
4340                         if (t1 == 0)
4341                           goto fail1;
4342                         t2 = expand_shift
4343                           (RSHIFT_EXPR, compute_mode, t1,
4344                            build_int_cst (NULL_TREE, post_shift),
4345                            NULL_RTX, 0);
4346                         t3 = expand_shift
4347                           (RSHIFT_EXPR, compute_mode, op0,
4348                            build_int_cst (NULL_TREE, size - 1),
4349                            NULL_RTX, 0);
4350                         if (d < 0)
4351                           quotient
4352                             = force_operand (gen_rtx_MINUS (compute_mode,
4353                                                             t3, t2),
4354                                              tquotient);
4355                         else
4356                           quotient
4357                             = force_operand (gen_rtx_MINUS (compute_mode,
4358                                                             t2, t3),
4359                                              tquotient);
4360                       }
4361                     else
4362                       {
4363                         rtx t1, t2, t3, t4;
4364
4365                         if (post_shift >= BITS_PER_WORD
4366                             || size - 1 >= BITS_PER_WORD)
4367                           goto fail1;
4368
4369                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4370                         mlr = gen_int_mode (ml, compute_mode);
4371                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4372                                       + shift_cost[speed][compute_mode][size - 1]
4373                                       + 2 * add_cost[speed][compute_mode]);
4374                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4375                                                    NULL_RTX, 0,
4376                                                    max_cost - extra_cost);
4377                         if (t1 == 0)
4378                           goto fail1;
4379                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4380                                                           t1, op0),
4381                                             NULL_RTX);
4382                         t3 = expand_shift
4383                           (RSHIFT_EXPR, compute_mode, t2,
4384                            build_int_cst (NULL_TREE, post_shift),
4385                            NULL_RTX, 0);
4386                         t4 = expand_shift
4387                           (RSHIFT_EXPR, compute_mode, op0,
4388                            build_int_cst (NULL_TREE, size - 1),
4389                            NULL_RTX, 0);
4390                         if (d < 0)
4391                           quotient
4392                             = force_operand (gen_rtx_MINUS (compute_mode,
4393                                                             t4, t3),
4394                                              tquotient);
4395                         else
4396                           quotient
4397                             = force_operand (gen_rtx_MINUS (compute_mode,
4398                                                             t3, t4),
4399                                              tquotient);
4400                       }
4401                   }
4402                 else            /* Too wide mode to use tricky code */
4403                   break;
4404
4405                 insn = get_last_insn ();
4406                 if (insn != last
4407                     && (set = single_set (insn)) != 0
4408                     && SET_DEST (set) == quotient)
4409                   set_unique_reg_note (insn,
4410                                        REG_EQUAL,
4411                                        gen_rtx_DIV (compute_mode, op0, op1));
4412               }
4413             break;
4414           }
4415       fail1:
4416         delete_insns_since (last);
4417         break;
4418
4419       case FLOOR_DIV_EXPR:
4420       case FLOOR_MOD_EXPR:
4421       /* We will come here only for signed operations.  */
4422         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4423           {
4424             unsigned HOST_WIDE_INT mh;
4425             int pre_shift, lgup, post_shift;
4426             HOST_WIDE_INT d = INTVAL (op1);
4427             rtx ml;
4428
4429             if (d > 0)
4430               {
4431                 /* We could just as easily deal with negative constants here,
4432                    but it does not seem worth the trouble for GCC 2.6.  */
4433                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4434                   {
4435                     pre_shift = floor_log2 (d);
4436                     if (rem_flag)
4437                       {
4438                         remainder = expand_binop (compute_mode, and_optab, op0,
4439                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4440                                                   remainder, 0, OPTAB_LIB_WIDEN);
4441                         if (remainder)
4442                           return gen_lowpart (mode, remainder);
4443                       }
4444                     quotient = expand_shift
4445                       (RSHIFT_EXPR, compute_mode, op0,
4446                        build_int_cst (NULL_TREE, pre_shift),
4447                        tquotient, 0);
4448                   }
4449                 else
4450                   {
4451                     rtx t1, t2, t3, t4;
4452
4453                     mh = choose_multiplier (d, size, size - 1,
4454                                             &ml, &post_shift, &lgup);
4455                     gcc_assert (!mh);
4456
4457                     if (post_shift < BITS_PER_WORD
4458                         && size - 1 < BITS_PER_WORD)
4459                       {
4460                         t1 = expand_shift
4461                           (RSHIFT_EXPR, compute_mode, op0,
4462                            build_int_cst (NULL_TREE, size - 1),
4463                            NULL_RTX, 0);
4464                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4465                                            NULL_RTX, 0, OPTAB_WIDEN);
4466                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4467                                       + shift_cost[speed][compute_mode][size - 1]
4468                                       + 2 * add_cost[speed][compute_mode]);
4469                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4470                                                    NULL_RTX, 1,
4471                                                    max_cost - extra_cost);
4472                         if (t3 != 0)
4473                           {
4474                             t4 = expand_shift
4475                               (RSHIFT_EXPR, compute_mode, t3,
4476                                build_int_cst (NULL_TREE, post_shift),
4477                                NULL_RTX, 1);
4478                             quotient = expand_binop (compute_mode, xor_optab,
4479                                                      t4, t1, tquotient, 0,
4480                                                      OPTAB_WIDEN);
4481                           }
4482                       }
4483                   }
4484               }
4485             else
4486               {
4487                 rtx nsign, t1, t2, t3, t4;
4488                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4489                                                   op0, constm1_rtx), NULL_RTX);
4490                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4491                                    0, OPTAB_WIDEN);
4492                 nsign = expand_shift
4493                   (RSHIFT_EXPR, compute_mode, t2,
4494                    build_int_cst (NULL_TREE, size - 1),
4495                    NULL_RTX, 0);
4496                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4497                                     NULL_RTX);
4498                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4499                                     NULL_RTX, 0);
4500                 if (t4)
4501                   {
4502                     rtx t5;
4503                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4504                                       NULL_RTX, 0);
4505                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4506                                                             t4, t5),
4507                                               tquotient);
4508                   }
4509               }
4510           }
4511
4512         if (quotient != 0)
4513           break;
4514         delete_insns_since (last);
4515
4516         /* Try using an instruction that produces both the quotient and
4517            remainder, using truncation.  We can easily compensate the quotient
4518            or remainder to get floor rounding, once we have the remainder.
4519            Notice that we compute also the final remainder value here,
4520            and return the result right away.  */
4521         if (target == 0 || GET_MODE (target) != compute_mode)
4522           target = gen_reg_rtx (compute_mode);
4523
4524         if (rem_flag)
4525           {
4526             remainder
4527               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4528             quotient = gen_reg_rtx (compute_mode);
4529           }
4530         else
4531           {
4532             quotient
4533               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4534             remainder = gen_reg_rtx (compute_mode);
4535           }
4536
4537         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4538                                  quotient, remainder, 0))
4539           {
4540             /* This could be computed with a branch-less sequence.
4541                Save that for later.  */
4542             rtx tem;
4543             rtx label = gen_label_rtx ();
4544             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4545             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4546                                 NULL_RTX, 0, OPTAB_WIDEN);
4547             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4548             expand_dec (quotient, const1_rtx);
4549             expand_inc (remainder, op1);
4550             emit_label (label);
4551             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4552           }
4553
4554         /* No luck with division elimination or divmod.  Have to do it
4555            by conditionally adjusting op0 *and* the result.  */
4556         {
4557           rtx label1, label2, label3, label4, label5;
4558           rtx adjusted_op0;
4559           rtx tem;
4560
4561           quotient = gen_reg_rtx (compute_mode);
4562           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4563           label1 = gen_label_rtx ();
4564           label2 = gen_label_rtx ();
4565           label3 = gen_label_rtx ();
4566           label4 = gen_label_rtx ();
4567           label5 = gen_label_rtx ();
4568           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4569           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4570           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4571                               quotient, 0, OPTAB_LIB_WIDEN);
4572           if (tem != quotient)
4573             emit_move_insn (quotient, tem);
4574           emit_jump_insn (gen_jump (label5));
4575           emit_barrier ();
4576           emit_label (label1);
4577           expand_inc (adjusted_op0, const1_rtx);
4578           emit_jump_insn (gen_jump (label4));
4579           emit_barrier ();
4580           emit_label (label2);
4581           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4582           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4583                               quotient, 0, OPTAB_LIB_WIDEN);
4584           if (tem != quotient)
4585             emit_move_insn (quotient, tem);
4586           emit_jump_insn (gen_jump (label5));
4587           emit_barrier ();
4588           emit_label (label3);
4589           expand_dec (adjusted_op0, const1_rtx);
4590           emit_label (label4);
4591           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4592                               quotient, 0, OPTAB_LIB_WIDEN);
4593           if (tem != quotient)
4594             emit_move_insn (quotient, tem);
4595           expand_dec (quotient, const1_rtx);
4596           emit_label (label5);
4597         }
4598         break;
4599
4600       case CEIL_DIV_EXPR:
4601       case CEIL_MOD_EXPR:
4602         if (unsignedp)
4603           {
4604             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4605               {
4606                 rtx t1, t2, t3;
4607                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4608                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4609                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4610                                    tquotient, 1);
4611                 t2 = expand_binop (compute_mode, and_optab, op0,
4612                                    GEN_INT (d - 1),
4613                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4614                 t3 = gen_reg_rtx (compute_mode);
4615                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4616                                       compute_mode, 1, 1);
4617                 if (t3 == 0)
4618                   {
4619                     rtx lab;
4620                     lab = gen_label_rtx ();
4621                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4622                     expand_inc (t1, const1_rtx);
4623                     emit_label (lab);
4624                     quotient = t1;
4625                   }
4626                 else
4627                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4628                                                           t1, t3),
4629                                             tquotient);
4630                 break;
4631               }
4632
4633             /* Try using an instruction that produces both the quotient and
4634                remainder, using truncation.  We can easily compensate the
4635                quotient or remainder to get ceiling rounding, once we have the
4636                remainder.  Notice that we compute also the final remainder
4637                value here, and return the result right away.  */
4638             if (target == 0 || GET_MODE (target) != compute_mode)
4639               target = gen_reg_rtx (compute_mode);
4640
4641             if (rem_flag)
4642               {
4643                 remainder = (REG_P (target)
4644                              ? target : gen_reg_rtx (compute_mode));
4645                 quotient = gen_reg_rtx (compute_mode);
4646               }
4647             else
4648               {
4649                 quotient = (REG_P (target)
4650                             ? target : gen_reg_rtx (compute_mode));
4651                 remainder = gen_reg_rtx (compute_mode);
4652               }
4653
4654             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4655                                      remainder, 1))
4656               {
4657                 /* This could be computed with a branch-less sequence.
4658                    Save that for later.  */
4659                 rtx label = gen_label_rtx ();
4660                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4661                                  compute_mode, label);
4662                 expand_inc (quotient, const1_rtx);
4663                 expand_dec (remainder, op1);
4664                 emit_label (label);
4665                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4666               }
4667
4668             /* No luck with division elimination or divmod.  Have to do it
4669                by conditionally adjusting op0 *and* the result.  */
4670             {
4671               rtx label1, label2;
4672               rtx adjusted_op0, tem;
4673
4674               quotient = gen_reg_rtx (compute_mode);
4675               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4676               label1 = gen_label_rtx ();
4677               label2 = gen_label_rtx ();
4678               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4679                                compute_mode, label1);
4680               emit_move_insn  (quotient, const0_rtx);
4681               emit_jump_insn (gen_jump (label2));
4682               emit_barrier ();
4683               emit_label (label1);
4684               expand_dec (adjusted_op0, const1_rtx);
4685               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4686                                   quotient, 1, OPTAB_LIB_WIDEN);
4687               if (tem != quotient)
4688                 emit_move_insn (quotient, tem);
4689               expand_inc (quotient, const1_rtx);
4690               emit_label (label2);
4691             }
4692           }
4693         else /* signed */
4694           {
4695             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4696                 && INTVAL (op1) >= 0)
4697               {
4698                 /* This is extremely similar to the code for the unsigned case
4699                    above.  For 2.7 we should merge these variants, but for
4700                    2.6.1 I don't want to touch the code for unsigned since that
4701                    get used in C.  The signed case will only be used by other
4702                    languages (Ada).  */
4703
4704                 rtx t1, t2, t3;
4705                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4706                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4707                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4708                                    tquotient, 0);
4709                 t2 = expand_binop (compute_mode, and_optab, op0,
4710                                    GEN_INT (d - 1),
4711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4712                 t3 = gen_reg_rtx (compute_mode);
4713                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4714                                       compute_mode, 1, 1);
4715                 if (t3 == 0)
4716                   {
4717                     rtx lab;
4718                     lab = gen_label_rtx ();
4719                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4720                     expand_inc (t1, const1_rtx);
4721                     emit_label (lab);
4722                     quotient = t1;
4723                   }
4724                 else
4725                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4726                                                           t1, t3),
4727                                             tquotient);
4728                 break;
4729               }
4730
4731             /* Try using an instruction that produces both the quotient and
4732                remainder, using truncation.  We can easily compensate the
4733                quotient or remainder to get ceiling rounding, once we have the
4734                remainder.  Notice that we compute also the final remainder
4735                value here, and return the result right away.  */
4736             if (target == 0 || GET_MODE (target) != compute_mode)
4737               target = gen_reg_rtx (compute_mode);
4738             if (rem_flag)
4739               {
4740                 remainder= (REG_P (target)
4741                             ? target : gen_reg_rtx (compute_mode));
4742                 quotient = gen_reg_rtx (compute_mode);
4743               }
4744             else
4745               {
4746                 quotient = (REG_P (target)
4747                             ? target : gen_reg_rtx (compute_mode));
4748                 remainder = gen_reg_rtx (compute_mode);
4749               }
4750
4751             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4752                                      remainder, 0))
4753               {
4754                 /* This could be computed with a branch-less sequence.
4755                    Save that for later.  */
4756                 rtx tem;
4757                 rtx label = gen_label_rtx ();
4758                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4759                                  compute_mode, label);
4760                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4761                                     NULL_RTX, 0, OPTAB_WIDEN);
4762                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4763                 expand_inc (quotient, const1_rtx);
4764                 expand_dec (remainder, op1);
4765                 emit_label (label);
4766                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4767               }
4768
4769             /* No luck with division elimination or divmod.  Have to do it
4770                by conditionally adjusting op0 *and* the result.  */
4771             {
4772               rtx label1, label2, label3, label4, label5;
4773               rtx adjusted_op0;
4774               rtx tem;
4775
4776               quotient = gen_reg_rtx (compute_mode);
4777               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4778               label1 = gen_label_rtx ();
4779               label2 = gen_label_rtx ();
4780               label3 = gen_label_rtx ();
4781               label4 = gen_label_rtx ();
4782               label5 = gen_label_rtx ();
4783               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4784               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4785                                compute_mode, label1);
4786               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4787                                   quotient, 0, OPTAB_LIB_WIDEN);
4788               if (tem != quotient)
4789                 emit_move_insn (quotient, tem);
4790               emit_jump_insn (gen_jump (label5));
4791               emit_barrier ();
4792               emit_label (label1);
4793               expand_dec (adjusted_op0, const1_rtx);
4794               emit_jump_insn (gen_jump (label4));
4795               emit_barrier ();
4796               emit_label (label2);
4797               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4798                                compute_mode, label3);
4799               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4800                                   quotient, 0, OPTAB_LIB_WIDEN);
4801               if (tem != quotient)
4802                 emit_move_insn (quotient, tem);
4803               emit_jump_insn (gen_jump (label5));
4804               emit_barrier ();
4805               emit_label (label3);
4806               expand_inc (adjusted_op0, const1_rtx);
4807               emit_label (label4);
4808               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4809                                   quotient, 0, OPTAB_LIB_WIDEN);
4810               if (tem != quotient)
4811                 emit_move_insn (quotient, tem);
4812               expand_inc (quotient, const1_rtx);
4813               emit_label (label5);
4814             }
4815           }
4816         break;
4817
4818       case EXACT_DIV_EXPR:
4819         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4820           {
4821             HOST_WIDE_INT d = INTVAL (op1);
4822             unsigned HOST_WIDE_INT ml;
4823             int pre_shift;
4824             rtx t1;
4825
4826             pre_shift = floor_log2 (d & -d);
4827             ml = invert_mod2n (d >> pre_shift, size);
4828             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4829                                build_int_cst (NULL_TREE, pre_shift),
4830                                NULL_RTX, unsignedp);
4831             quotient = expand_mult (compute_mode, t1,
4832                                     gen_int_mode (ml, compute_mode),
4833                                     NULL_RTX, 1);
4834
4835             insn = get_last_insn ();
4836             set_unique_reg_note (insn,
4837                                  REG_EQUAL,
4838                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4839                                                  compute_mode,
4840                                                  op0, op1));
4841           }
4842         break;
4843
4844       case ROUND_DIV_EXPR:
4845       case ROUND_MOD_EXPR:
4846         if (unsignedp)
4847           {
4848             rtx tem;
4849             rtx label;
4850             label = gen_label_rtx ();
4851             quotient = gen_reg_rtx (compute_mode);
4852             remainder = gen_reg_rtx (compute_mode);
4853             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4854               {
4855                 rtx tem;
4856                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4857                                          quotient, 1, OPTAB_LIB_WIDEN);
4858                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4859                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4860                                           remainder, 1, OPTAB_LIB_WIDEN);
4861               }
4862             tem = plus_constant (op1, -1);
4863             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4864                                 build_int_cst (NULL_TREE, 1),
4865                                 NULL_RTX, 1);
4866             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4867             expand_inc (quotient, const1_rtx);
4868             expand_dec (remainder, op1);
4869             emit_label (label);
4870           }
4871         else
4872           {
4873             rtx abs_rem, abs_op1, tem, mask;
4874             rtx label;
4875             label = gen_label_rtx ();
4876             quotient = gen_reg_rtx (compute_mode);
4877             remainder = gen_reg_rtx (compute_mode);
4878             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4879               {
4880                 rtx tem;
4881                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4882                                          quotient, 0, OPTAB_LIB_WIDEN);
4883                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4884                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4885                                           remainder, 0, OPTAB_LIB_WIDEN);
4886               }
4887             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4888             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4889             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4890                                 build_int_cst (NULL_TREE, 1),
4891                                 NULL_RTX, 1);
4892             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4893             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4894                                 NULL_RTX, 0, OPTAB_WIDEN);
4895             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4896                                  build_int_cst (NULL_TREE, size - 1),
4897                                  NULL_RTX, 0);
4898             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4899                                 NULL_RTX, 0, OPTAB_WIDEN);
4900             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4901                                 NULL_RTX, 0, OPTAB_WIDEN);
4902             expand_inc (quotient, tem);
4903             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4904                                 NULL_RTX, 0, OPTAB_WIDEN);
4905             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4906                                 NULL_RTX, 0, OPTAB_WIDEN);
4907             expand_dec (remainder, tem);
4908             emit_label (label);
4909           }
4910         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4911
4912       default:
4913         gcc_unreachable ();
4914       }
4915
4916   if (quotient == 0)
4917     {
4918       if (target && GET_MODE (target) != compute_mode)
4919         target = 0;
4920
4921       if (rem_flag)
4922         {
4923           /* Try to produce the remainder without producing the quotient.
4924              If we seem to have a divmod pattern that does not require widening,
4925              don't try widening here.  We should really have a WIDEN argument
4926              to expand_twoval_binop, since what we'd really like to do here is
4927              1) try a mod insn in compute_mode
4928              2) try a divmod insn in compute_mode
4929              3) try a div insn in compute_mode and multiply-subtract to get
4930                 remainder
4931              4) try the same things with widening allowed.  */
4932           remainder
4933             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4934                                  op0, op1, target,
4935                                  unsignedp,
4936                                  ((optab_handler (optab2, compute_mode)
4937                                    != CODE_FOR_nothing)
4938                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4939           if (remainder == 0)
4940             {
4941               /* No luck there.  Can we do remainder and divide at once
4942                  without a library call?  */
4943               remainder = gen_reg_rtx (compute_mode);
4944               if (! expand_twoval_binop ((unsignedp
4945                                           ? udivmod_optab
4946                                           : sdivmod_optab),
4947                                          op0, op1,
4948                                          NULL_RTX, remainder, unsignedp))
4949                 remainder = 0;
4950             }
4951
4952           if (remainder)
4953             return gen_lowpart (mode, remainder);
4954         }
4955
4956       /* Produce the quotient.  Try a quotient insn, but not a library call.
4957          If we have a divmod in this mode, use it in preference to widening
4958          the div (for this test we assume it will not fail). Note that optab2
4959          is set to the one of the two optabs that the call below will use.  */
4960       quotient
4961         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4962                              op0, op1, rem_flag ? NULL_RTX : target,
4963                              unsignedp,
4964                              ((optab_handler (optab2, compute_mode)
4965                                != CODE_FOR_nothing)
4966                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4967
4968       if (quotient == 0)
4969         {
4970           /* No luck there.  Try a quotient-and-remainder insn,
4971              keeping the quotient alone.  */
4972           quotient = gen_reg_rtx (compute_mode);
4973           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4974                                      op0, op1,
4975                                      quotient, NULL_RTX, unsignedp))
4976             {
4977               quotient = 0;
4978               if (! rem_flag)
4979                 /* Still no luck.  If we are not computing the remainder,
4980                    use a library call for the quotient.  */
4981                 quotient = sign_expand_binop (compute_mode,
4982                                               udiv_optab, sdiv_optab,
4983                                               op0, op1, target,
4984                                               unsignedp, OPTAB_LIB_WIDEN);
4985             }
4986         }
4987     }
4988
4989   if (rem_flag)
4990     {
4991       if (target && GET_MODE (target) != compute_mode)
4992         target = 0;
4993
4994       if (quotient == 0)
4995         {
4996           /* No divide instruction either.  Use library for remainder.  */
4997           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4998                                          op0, op1, target,
4999                                          unsignedp, OPTAB_LIB_WIDEN);
5000           /* No remainder function.  Try a quotient-and-remainder
5001              function, keeping the remainder.  */
5002           if (!remainder)
5003             {
5004               remainder = gen_reg_rtx (compute_mode);
5005               if (!expand_twoval_binop_libfunc
5006                   (unsignedp ? udivmod_optab : sdivmod_optab,
5007                    op0, op1,
5008                    NULL_RTX, remainder,
5009                    unsignedp ? UMOD : MOD))
5010                 remainder = NULL_RTX;
5011             }
5012         }
5013       else
5014         {
5015           /* We divided.  Now finish doing X - Y * (X / Y).  */
5016           remainder = expand_mult (compute_mode, quotient, op1,
5017                                    NULL_RTX, unsignedp);
5018           remainder = expand_binop (compute_mode, sub_optab, op0,
5019                                     remainder, target, unsignedp,
5020                                     OPTAB_LIB_WIDEN);
5021         }
5022     }
5023
5024   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5025 }
5026 \f
5027 /* Return a tree node with data type TYPE, describing the value of X.
5028    Usually this is an VAR_DECL, if there is no obvious better choice.
5029    X may be an expression, however we only support those expressions
5030    generated by loop.c.  */
5031
5032 tree
5033 make_tree (tree type, rtx x)
5034 {
5035   tree t;
5036
5037   switch (GET_CODE (x))
5038     {
5039     case CONST_INT:
5040       {
5041         HOST_WIDE_INT hi = 0;
5042
5043         if (INTVAL (x) < 0
5044             && !(TYPE_UNSIGNED (type)
5045                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5046                      < HOST_BITS_PER_WIDE_INT)))
5047           hi = -1;
5048
5049         t = build_int_cst_wide (type, INTVAL (x), hi);
5050
5051         return t;
5052       }
5053
5054     case CONST_DOUBLE:
5055       if (GET_MODE (x) == VOIDmode)
5056         t = build_int_cst_wide (type,
5057                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5058       else
5059         {
5060           REAL_VALUE_TYPE d;
5061
5062           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5063           t = build_real (type, d);
5064         }
5065
5066       return t;
5067
5068     case CONST_VECTOR:
5069       {
5070         int units = CONST_VECTOR_NUNITS (x);
5071         tree itype = TREE_TYPE (type);
5072         tree t = NULL_TREE;
5073         int i;
5074
5075
5076         /* Build a tree with vector elements.  */
5077         for (i = units - 1; i >= 0; --i)
5078           {
5079             rtx elt = CONST_VECTOR_ELT (x, i);
5080             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5081           }
5082
5083         return build_vector (type, t);
5084       }
5085
5086     case PLUS:
5087       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5088                           make_tree (type, XEXP (x, 1)));
5089
5090     case MINUS:
5091       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5092                           make_tree (type, XEXP (x, 1)));
5093
5094     case NEG:
5095       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5096
5097     case MULT:
5098       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5099                           make_tree (type, XEXP (x, 1)));
5100
5101     case ASHIFT:
5102       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5103                           make_tree (type, XEXP (x, 1)));
5104
5105     case LSHIFTRT:
5106       t = unsigned_type_for (type);
5107       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5108                                          make_tree (t, XEXP (x, 0)),
5109                                          make_tree (type, XEXP (x, 1))));
5110
5111     case ASHIFTRT:
5112       t = signed_type_for (type);
5113       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5114                                          make_tree (t, XEXP (x, 0)),
5115                                          make_tree (type, XEXP (x, 1))));
5116
5117     case DIV:
5118       if (TREE_CODE (type) != REAL_TYPE)
5119         t = signed_type_for (type);
5120       else
5121         t = type;
5122
5123       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5124                                          make_tree (t, XEXP (x, 0)),
5125                                          make_tree (t, XEXP (x, 1))));
5126     case UDIV:
5127       t = unsigned_type_for (type);
5128       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5129                                          make_tree (t, XEXP (x, 0)),
5130                                          make_tree (t, XEXP (x, 1))));
5131
5132     case SIGN_EXTEND:
5133     case ZERO_EXTEND:
5134       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5135                                           GET_CODE (x) == ZERO_EXTEND);
5136       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5137
5138     case CONST:
5139       return make_tree (type, XEXP (x, 0));
5140
5141     case SYMBOL_REF:
5142       t = SYMBOL_REF_DECL (x);
5143       if (t)
5144         return fold_convert (type, build_fold_addr_expr (t));
5145       /* else fall through.  */
5146
5147     default:
5148       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5149
5150       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5151          address mode to pointer mode.  */
5152       if (POINTER_TYPE_P (type))
5153         x = convert_memory_address_addr_space
5154               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5155
5156       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5157          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5158       t->decl_with_rtl.rtl = x;
5159
5160       return t;
5161     }
5162 }
5163 \f
5164 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5165    and returning TARGET.
5166
5167    If TARGET is 0, a pseudo-register or constant is returned.  */
5168
5169 rtx
5170 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5171 {
5172   rtx tem = 0;
5173
5174   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5175     tem = simplify_binary_operation (AND, mode, op0, op1);
5176   if (tem == 0)
5177     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5178
5179   if (target == 0)
5180     target = tem;
5181   else if (tem != target)
5182     emit_move_insn (target, tem);
5183   return target;
5184 }
5185
5186 /* Helper function for emit_store_flag.  */
5187 static rtx
5188 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5189              enum machine_mode mode, enum machine_mode compare_mode,
5190              int unsignedp, rtx x, rtx y, int normalizep,
5191              enum machine_mode target_mode)
5192 {
5193   rtx op0, last, comparison, subtarget, pattern;
5194   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5195
5196   last = get_last_insn ();
5197   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5198   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5199   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5200   if (!x || !y
5201       || !insn_data[icode].operand[2].predicate
5202           (x, insn_data[icode].operand[2].mode)
5203       || !insn_data[icode].operand[3].predicate
5204           (y, insn_data[icode].operand[3].mode)
5205       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5206     {
5207       delete_insns_since (last);
5208       return NULL_RTX;
5209     }
5210
5211   if (target_mode == VOIDmode)
5212     target_mode = result_mode;
5213   if (!target)
5214     target = gen_reg_rtx (target_mode);
5215
5216   if (optimize
5217       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5218     subtarget = gen_reg_rtx (result_mode);
5219   else
5220     subtarget = target;
5221
5222   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5223   if (!pattern)
5224     return NULL_RTX;
5225   emit_insn (pattern);
5226
5227   /* If we are converting to a wider mode, first convert to
5228      TARGET_MODE, then normalize.  This produces better combining
5229      opportunities on machines that have a SIGN_EXTRACT when we are
5230      testing a single bit.  This mostly benefits the 68k.
5231
5232      If STORE_FLAG_VALUE does not have the sign bit set when
5233      interpreted in MODE, we can do this conversion as unsigned, which
5234      is usually more efficient.  */
5235   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5236     {
5237       convert_move (target, subtarget,
5238                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5239                     && 0 == (STORE_FLAG_VALUE
5240                              & ((HOST_WIDE_INT) 1
5241                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5242       op0 = target;
5243       result_mode = target_mode;
5244     }
5245   else
5246     op0 = subtarget;
5247
5248   /* If we want to keep subexpressions around, don't reuse our last
5249      target.  */
5250   if (optimize)
5251     subtarget = 0;
5252
5253   /* Now normalize to the proper value in MODE.  Sometimes we don't
5254      have to do anything.  */
5255   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5256     ;
5257   /* STORE_FLAG_VALUE might be the most negative number, so write
5258      the comparison this way to avoid a compiler-time warning.  */
5259   else if (- normalizep == STORE_FLAG_VALUE)
5260     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5261
5262   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5263      it hard to use a value of just the sign bit due to ANSI integer
5264      constant typing rules.  */
5265   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5266            && (STORE_FLAG_VALUE
5267                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5268     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5269                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5270                         normalizep == 1);
5271   else
5272     {
5273       gcc_assert (STORE_FLAG_VALUE & 1);
5274
5275       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5276       if (normalizep == -1)
5277         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5278     }
5279
5280   /* If we were converting to a smaller mode, do the conversion now.  */
5281   if (target_mode != result_mode)
5282     {
5283       convert_move (target, op0, 0);
5284       return target;
5285     }
5286   else
5287     return op0;
5288 }
5289
5290
5291 /* A subroutine of emit_store_flag only including "tricks" that do not
5292    need a recursive call.  These are kept separate to avoid infinite
5293    loops.  */
5294
5295 static rtx
5296 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5297                    enum machine_mode mode, int unsignedp, int normalizep,
5298                    enum machine_mode target_mode)
5299 {
5300   rtx subtarget;
5301   enum insn_code icode;
5302   enum machine_mode compare_mode;
5303   enum mode_class mclass;
5304   enum rtx_code scode;
5305   rtx tem;
5306
5307   if (unsignedp)
5308     code = unsigned_condition (code);
5309   scode = swap_condition (code);
5310
5311   /* If one operand is constant, make it the second one.  Only do this
5312      if the other operand is not constant as well.  */
5313
5314   if (swap_commutative_operands_p (op0, op1))
5315     {
5316       tem = op0;
5317       op0 = op1;
5318       op1 = tem;
5319       code = swap_condition (code);
5320     }
5321
5322   if (mode == VOIDmode)
5323     mode = GET_MODE (op0);
5324
5325   /* For some comparisons with 1 and -1, we can convert this to
5326      comparisons with zero.  This will often produce more opportunities for
5327      store-flag insns.  */
5328
5329   switch (code)
5330     {
5331     case LT:
5332       if (op1 == const1_rtx)
5333         op1 = const0_rtx, code = LE;
5334       break;
5335     case LE:
5336       if (op1 == constm1_rtx)
5337         op1 = const0_rtx, code = LT;
5338       break;
5339     case GE:
5340       if (op1 == const1_rtx)
5341         op1 = const0_rtx, code = GT;
5342       break;
5343     case GT:
5344       if (op1 == constm1_rtx)
5345         op1 = const0_rtx, code = GE;
5346       break;
5347     case GEU:
5348       if (op1 == const1_rtx)
5349         op1 = const0_rtx, code = NE;
5350       break;
5351     case LTU:
5352       if (op1 == const1_rtx)
5353         op1 = const0_rtx, code = EQ;
5354       break;
5355     default:
5356       break;
5357     }
5358
5359   /* If we are comparing a double-word integer with zero or -1, we can
5360      convert the comparison into one involving a single word.  */
5361   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5362       && GET_MODE_CLASS (mode) == MODE_INT
5363       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5364     {
5365       if ((code == EQ || code == NE)
5366           && (op1 == const0_rtx || op1 == constm1_rtx))
5367         {
5368           rtx op00, op01;
5369
5370           /* Do a logical OR or AND of the two words and compare the
5371              result.  */
5372           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5373           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5374           tem = expand_binop (word_mode,
5375                               op1 == const0_rtx ? ior_optab : and_optab,
5376                               op00, op01, NULL_RTX, unsignedp,
5377                               OPTAB_DIRECT);
5378
5379           if (tem != 0)
5380             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5381                                    unsignedp, normalizep);
5382         }
5383       else if ((code == LT || code == GE) && op1 == const0_rtx)
5384         {
5385           rtx op0h;
5386
5387           /* If testing the sign bit, can just test on high word.  */
5388           op0h = simplify_gen_subreg (word_mode, op0, mode,
5389                                       subreg_highpart_offset (word_mode,
5390                                                               mode));
5391           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5392                                  unsignedp, normalizep);
5393         }
5394       else
5395         tem = NULL_RTX;
5396
5397       if (tem)
5398         {
5399           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5400             return tem;
5401           if (!target)
5402             target = gen_reg_rtx (target_mode);
5403
5404           convert_move (target, tem,
5405                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5406                               & ((HOST_WIDE_INT) 1
5407                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5408           return target;
5409         }
5410     }
5411
5412   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5413      complement of A (for GE) and shifting the sign bit to the low bit.  */
5414   if (op1 == const0_rtx && (code == LT || code == GE)
5415       && GET_MODE_CLASS (mode) == MODE_INT
5416       && (normalizep || STORE_FLAG_VALUE == 1
5417           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5418               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5419                   == ((unsigned HOST_WIDE_INT) 1
5420                       << (GET_MODE_BITSIZE (mode) - 1))))))
5421     {
5422       subtarget = target;
5423
5424       if (!target)
5425         target_mode = mode;
5426
5427       /* If the result is to be wider than OP0, it is best to convert it
5428          first.  If it is to be narrower, it is *incorrect* to convert it
5429          first.  */
5430       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5431         {
5432           op0 = convert_modes (target_mode, mode, op0, 0);
5433           mode = target_mode;
5434         }
5435
5436       if (target_mode != mode)
5437         subtarget = 0;
5438
5439       if (code == GE)
5440         op0 = expand_unop (mode, one_cmpl_optab, op0,
5441                            ((STORE_FLAG_VALUE == 1 || normalizep)
5442                             ? 0 : subtarget), 0);
5443
5444       if (STORE_FLAG_VALUE == 1 || normalizep)
5445         /* If we are supposed to produce a 0/1 value, we want to do
5446            a logical shift from the sign bit to the low-order bit; for
5447            a -1/0 value, we do an arithmetic shift.  */
5448         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5449                             size_int (GET_MODE_BITSIZE (mode) - 1),
5450                             subtarget, normalizep != -1);
5451
5452       if (mode != target_mode)
5453         op0 = convert_modes (target_mode, mode, op0, 0);
5454
5455       return op0;
5456     }
5457
5458   mclass = GET_MODE_CLASS (mode);
5459   for (compare_mode = mode; compare_mode != VOIDmode;
5460        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5461     {
5462      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5463      icode = optab_handler (cstore_optab, optab_mode);
5464      if (icode != CODE_FOR_nothing)
5465         {
5466           do_pending_stack_adjust ();
5467           tem = emit_cstore (target, icode, code, mode, compare_mode,
5468                              unsignedp, op0, op1, normalizep, target_mode);
5469           if (tem)
5470             return tem;
5471
5472           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5473             {
5474               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5475                                  unsignedp, op1, op0, normalizep, target_mode);
5476               if (tem)
5477                 return tem;
5478             }
5479           break;
5480         }
5481     }
5482
5483   return 0;
5484 }
5485
5486 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5487    and storing in TARGET.  Normally return TARGET.
5488    Return 0 if that cannot be done.
5489
5490    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5491    it is VOIDmode, they cannot both be CONST_INT.
5492
5493    UNSIGNEDP is for the case where we have to widen the operands
5494    to perform the operation.  It says to use zero-extension.
5495
5496    NORMALIZEP is 1 if we should convert the result to be either zero
5497    or one.  Normalize is -1 if we should convert the result to be
5498    either zero or -1.  If NORMALIZEP is zero, the result will be left
5499    "raw" out of the scc insn.  */
5500
5501 rtx
5502 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5503                  enum machine_mode mode, int unsignedp, int normalizep)
5504 {
5505   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5506   enum rtx_code rcode;
5507   rtx subtarget;
5508   rtx tem, last, trueval;
5509
5510   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5511                            target_mode);
5512   if (tem)
5513     return tem;
5514
5515   /* If we reached here, we can't do this with a scc insn, however there
5516      are some comparisons that can be done in other ways.  Don't do any
5517      of these cases if branches are very cheap.  */
5518   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5519     return 0;
5520
5521   /* See what we need to return.  We can only return a 1, -1, or the
5522      sign bit.  */
5523
5524   if (normalizep == 0)
5525     {
5526       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5527         normalizep = STORE_FLAG_VALUE;
5528
5529       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5530                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5531                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5532         ;
5533       else
5534         return 0;
5535     }
5536
5537   last = get_last_insn ();
5538
5539   /* If optimizing, use different pseudo registers for each insn, instead
5540      of reusing the same pseudo.  This leads to better CSE, but slows
5541      down the compiler, since there are more pseudos */
5542   subtarget = (!optimize
5543                && (target_mode == mode)) ? target : NULL_RTX;
5544   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5545
5546   /* For floating-point comparisons, try the reverse comparison or try
5547      changing the "orderedness" of the comparison.  */
5548   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5549     {
5550       enum rtx_code first_code;
5551       bool and_them;
5552
5553       rcode = reverse_condition_maybe_unordered (code);
5554       if (can_compare_p (rcode, mode, ccp_store_flag)
5555           && (code == ORDERED || code == UNORDERED
5556               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5557               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5558         {
5559           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5560                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5561
5562           /* For the reverse comparison, use either an addition or a XOR.  */
5563           if (want_add
5564               && rtx_cost (GEN_INT (normalizep), PLUS,
5565                            optimize_insn_for_speed_p ()) == 0)
5566             {
5567               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5568                                        STORE_FLAG_VALUE, target_mode);
5569               if (tem)
5570                 return expand_binop (target_mode, add_optab, tem,
5571                                      GEN_INT (normalizep),
5572                                      target, 0, OPTAB_WIDEN);
5573             }
5574           else if (!want_add
5575                    && rtx_cost (trueval, XOR,
5576                                 optimize_insn_for_speed_p ()) == 0)
5577             {
5578               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5579                                        normalizep, target_mode);
5580               if (tem)
5581                 return expand_binop (target_mode, xor_optab, tem, trueval,
5582                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5583             }
5584         }
5585
5586       delete_insns_since (last);
5587
5588       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5589       if (code == ORDERED || code == UNORDERED)
5590         return 0;
5591
5592       and_them = split_comparison (code, mode, &first_code, &code);
5593
5594       /* If there are no NaNs, the first comparison should always fall through.
5595          Effectively change the comparison to the other one.  */
5596       if (!HONOR_NANS (mode))
5597         {
5598           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5599           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5600                                     target_mode);
5601         }
5602
5603 #ifdef HAVE_conditional_move
5604       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5605          conditional move.  */
5606       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5607                                normalizep, target_mode);
5608       if (tem == 0)
5609         return 0;
5610
5611       if (and_them)
5612         tem = emit_conditional_move (target, code, op0, op1, mode,
5613                                      tem, const0_rtx, GET_MODE (tem), 0);
5614       else
5615         tem = emit_conditional_move (target, code, op0, op1, mode,
5616                                      trueval, tem, GET_MODE (tem), 0);
5617
5618       if (tem == 0)
5619         delete_insns_since (last);
5620       return tem;
5621 #else
5622       return 0;
5623 #endif
5624     }
5625
5626   /* The remaining tricks only apply to integer comparisons.  */
5627
5628   if (GET_MODE_CLASS (mode) != MODE_INT)
5629     return 0;
5630
5631   /* If this is an equality comparison of integers, we can try to exclusive-or
5632      (or subtract) the two operands and use a recursive call to try the
5633      comparison with zero.  Don't do any of these cases if branches are
5634      very cheap.  */
5635
5636   if ((code == EQ || code == NE) && op1 != const0_rtx)
5637     {
5638       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5639                           OPTAB_WIDEN);
5640
5641       if (tem == 0)
5642         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5643                             OPTAB_WIDEN);
5644       if (tem != 0)
5645         tem = emit_store_flag (target, code, tem, const0_rtx,
5646                                mode, unsignedp, normalizep);
5647       if (tem != 0)
5648         return tem;
5649
5650       delete_insns_since (last);
5651     }
5652
5653   /* For integer comparisons, try the reverse comparison.  However, for
5654      small X and if we'd have anyway to extend, implementing "X != 0"
5655      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5656   rcode = reverse_condition (code);
5657   if (can_compare_p (rcode, mode, ccp_store_flag)
5658       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5659             && code == NE
5660             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5661             && op1 == const0_rtx))
5662     {
5663       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5664                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5665
5666       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5667       if (want_add
5668           && rtx_cost (GEN_INT (normalizep), PLUS,
5669                        optimize_insn_for_speed_p ()) == 0)
5670         {
5671           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5672                                    STORE_FLAG_VALUE, target_mode);
5673           if (tem != 0)
5674             tem = expand_binop (target_mode, add_optab, tem,
5675                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5676         }
5677       else if (!want_add
5678                && rtx_cost (trueval, XOR,
5679                             optimize_insn_for_speed_p ()) == 0)
5680         {
5681           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5682                                    normalizep, target_mode);
5683           if (tem != 0)
5684             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5685                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5686         }
5687
5688       if (tem != 0)
5689         return tem;
5690       delete_insns_since (last);
5691     }
5692
5693   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5694      the constant zero.  Reject all other comparisons at this point.  Only
5695      do LE and GT if branches are expensive since they are expensive on
5696      2-operand machines.  */
5697
5698   if (op1 != const0_rtx
5699       || (code != EQ && code != NE
5700           && (BRANCH_COST (optimize_insn_for_speed_p (),
5701                            false) <= 1 || (code != LE && code != GT))))
5702     return 0;
5703
5704   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5705      do the necessary operation below.  */
5706
5707   tem = 0;
5708
5709   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5710      the sign bit set.  */
5711
5712   if (code == LE)
5713     {
5714       /* This is destructive, so SUBTARGET can't be OP0.  */
5715       if (rtx_equal_p (subtarget, op0))
5716         subtarget = 0;
5717
5718       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5719                           OPTAB_WIDEN);
5720       if (tem)
5721         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5722                             OPTAB_WIDEN);
5723     }
5724
5725   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5726      number of bits in the mode of OP0, minus one.  */
5727
5728   if (code == GT)
5729     {
5730       if (rtx_equal_p (subtarget, op0))
5731         subtarget = 0;
5732
5733       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5734                           size_int (GET_MODE_BITSIZE (mode) - 1),
5735                           subtarget, 0);
5736       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5737                           OPTAB_WIDEN);
5738     }
5739
5740   if (code == EQ || code == NE)
5741     {
5742       /* For EQ or NE, one way to do the comparison is to apply an operation
5743          that converts the operand into a positive number if it is nonzero
5744          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5745          for NE we negate.  This puts the result in the sign bit.  Then we
5746          normalize with a shift, if needed.
5747
5748          Two operations that can do the above actions are ABS and FFS, so try
5749          them.  If that doesn't work, and MODE is smaller than a full word,
5750          we can use zero-extension to the wider mode (an unsigned conversion)
5751          as the operation.  */
5752
5753       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5754          that is compensated by the subsequent overflow when subtracting
5755          one / negating.  */
5756
5757       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5758         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5759       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5760         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5761       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5762         {
5763           tem = convert_modes (word_mode, mode, op0, 1);
5764           mode = word_mode;
5765         }
5766
5767       if (tem != 0)
5768         {
5769           if (code == EQ)
5770             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5771                                 0, OPTAB_WIDEN);
5772           else
5773             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5774         }
5775
5776       /* If we couldn't do it that way, for NE we can "or" the two's complement
5777          of the value with itself.  For EQ, we take the one's complement of
5778          that "or", which is an extra insn, so we only handle EQ if branches
5779          are expensive.  */
5780
5781       if (tem == 0
5782           && (code == NE
5783               || BRANCH_COST (optimize_insn_for_speed_p (),
5784                               false) > 1))
5785         {
5786           if (rtx_equal_p (subtarget, op0))
5787             subtarget = 0;
5788
5789           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5790           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5791                               OPTAB_WIDEN);
5792
5793           if (tem && code == EQ)
5794             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5795         }
5796     }
5797
5798   if (tem && normalizep)
5799     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5800                         size_int (GET_MODE_BITSIZE (mode) - 1),
5801                         subtarget, normalizep == 1);
5802
5803   if (tem)
5804     {
5805       if (!target)
5806         ;
5807       else if (GET_MODE (tem) != target_mode)
5808         {
5809           convert_move (target, tem, 0);
5810           tem = target;
5811         }
5812       else if (!subtarget)
5813         {
5814           emit_move_insn (target, tem);
5815           tem = target;
5816         }
5817     }
5818   else
5819     delete_insns_since (last);
5820
5821   return tem;
5822 }
5823
5824 /* Like emit_store_flag, but always succeeds.  */
5825
5826 rtx
5827 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5828                        enum machine_mode mode, int unsignedp, int normalizep)
5829 {
5830   rtx tem, label;
5831   rtx trueval, falseval;
5832
5833   /* First see if emit_store_flag can do the job.  */
5834   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5835   if (tem != 0)
5836     return tem;
5837
5838   if (!target)
5839     target = gen_reg_rtx (word_mode);
5840
5841   /* If this failed, we have to do this with set/compare/jump/set code.
5842      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5843   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5844   if (code == NE
5845       && GET_MODE_CLASS (mode) == MODE_INT
5846       && REG_P (target)
5847       && op0 == target
5848       && op1 == const0_rtx)
5849     {
5850       label = gen_label_rtx ();
5851       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5852                                mode, NULL_RTX, NULL_RTX, label, -1);
5853       emit_move_insn (target, trueval);
5854       emit_label (label);
5855       return target;
5856     }
5857
5858   if (!REG_P (target)
5859       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5860     target = gen_reg_rtx (GET_MODE (target));
5861
5862   /* Jump in the right direction if the target cannot implement CODE
5863      but can jump on its reverse condition.  */
5864   falseval = const0_rtx;
5865   if (! can_compare_p (code, mode, ccp_jump)
5866       && (! FLOAT_MODE_P (mode)
5867           || code == ORDERED || code == UNORDERED
5868           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5869           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5870     {
5871       enum rtx_code rcode;
5872       if (FLOAT_MODE_P (mode))
5873         rcode = reverse_condition_maybe_unordered (code);
5874       else
5875         rcode = reverse_condition (code);
5876
5877       /* Canonicalize to UNORDERED for the libcall.  */
5878       if (can_compare_p (rcode, mode, ccp_jump)
5879           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5880         {
5881           falseval = trueval;
5882           trueval = const0_rtx;
5883           code = rcode;
5884         }
5885     }
5886
5887   emit_move_insn (target, trueval);
5888   label = gen_label_rtx ();
5889   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5890                            NULL_RTX, label, -1);
5891
5892   emit_move_insn (target, falseval);
5893   emit_label (label);
5894
5895   return target;
5896 }
5897 \f
5898 /* Perform possibly multi-word comparison and conditional jump to LABEL
5899    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5900    now a thin wrapper around do_compare_rtx_and_jump.  */
5901
5902 static void
5903 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5904                  rtx label)
5905 {
5906   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5907   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5908                            NULL_RTX, NULL_RTX, label, -1);
5909 }