gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT, rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT, rtx);
  53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  54                                     unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT,
  56                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  57 static rtx mask_rtx (enum machine_mode, int, int, int);
  58 static rtx lshift_value (enum machine_mode, rtx, int, int);
  59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, int);
  61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64
  65 /* Test whether a value is zero of a power of two.  */
  66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72
  73 /* Reduce conditional compilation elsewhere.  */
  74 #ifndef HAVE_insv
  75 #define HAVE_insv       0
  76 #define CODE_FOR_insv   CODE_FOR_nothing
  77 #define gen_insv(a,b,c,d) NULL_RTX
  78 #endif
  79 #ifndef HAVE_extv
  80 #define HAVE_extv       0
  81 #define CODE_FOR_extv   CODE_FOR_nothing
  82 #define gen_extv(a,b,c,d) NULL_RTX
  83 #endif
  84 #ifndef HAVE_extzv
  85 #define HAVE_extzv      0
  86 #define CODE_FOR_extzv  CODE_FOR_nothing
  87 #define gen_extzv(a,b,c,d) NULL_RTX
  88 #endif
  89
  90 void
  91 init_expmed (void)
  92 {
  93   struct
  94   {
  95     struct rtx_def reg;         rtunion reg_fld[2];
  96     struct rtx_def plus;        rtunion plus_fld1;
  97     struct rtx_def neg;
  98     struct rtx_def mult;        rtunion mult_fld1;
  99     struct rtx_def sdiv;        rtunion sdiv_fld1;
 100     struct rtx_def udiv;        rtunion udiv_fld1;
 101     struct rtx_def zext;
 102     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 103     struct rtx_def smod_32;     rtunion smod_32_fld1;
 104     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 105     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 106     struct rtx_def wide_trunc;
 107     struct rtx_def shift;       rtunion shift_fld1;
 108     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 109     struct rtx_def shift_add;   rtunion shift_add_fld1;
 110     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 111     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 112   } all;
 113
 114   rtx pow2[MAX_BITS_PER_WORD];
 115   rtx cint[MAX_BITS_PER_WORD];
 116   int m, n;
 117   enum machine_mode mode, wider_mode;
 118   int speed;
 119
 120
 121   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 122     {
 123       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 124       cint[m] = GEN_INT (m);
 125     }
 126   memset (&all, 0, sizeof all);
 127
 128   PUT_CODE (&all.reg, REG);
 129   /* Avoid using hard regs in ways which may be unsupported.  */
 130   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 131
 132   PUT_CODE (&all.plus, PLUS);
 133   XEXP (&all.plus, 0) = &all.reg;
 134   XEXP (&all.plus, 1) = &all.reg;
 135
 136   PUT_CODE (&all.neg, NEG);
 137   XEXP (&all.neg, 0) = &all.reg;
 138
 139   PUT_CODE (&all.mult, MULT);
 140   XEXP (&all.mult, 0) = &all.reg;
 141   XEXP (&all.mult, 1) = &all.reg;
 142
 143   PUT_CODE (&all.sdiv, DIV);
 144   XEXP (&all.sdiv, 0) = &all.reg;
 145   XEXP (&all.sdiv, 1) = &all.reg;
 146
 147   PUT_CODE (&all.udiv, UDIV);
 148   XEXP (&all.udiv, 0) = &all.reg;
 149   XEXP (&all.udiv, 1) = &all.reg;
 150
 151   PUT_CODE (&all.sdiv_32, DIV);
 152   XEXP (&all.sdiv_32, 0) = &all.reg;
 153   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 154
 155   PUT_CODE (&all.smod_32, MOD);
 156   XEXP (&all.smod_32, 0) = &all.reg;
 157   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 158
 159   PUT_CODE (&all.zext, ZERO_EXTEND);
 160   XEXP (&all.zext, 0) = &all.reg;
 161
 162   PUT_CODE (&all.wide_mult, MULT);
 163   XEXP (&all.wide_mult, 0) = &all.zext;
 164   XEXP (&all.wide_mult, 1) = &all.zext;
 165
 166   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 167   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 168
 169   PUT_CODE (&all.wide_trunc, TRUNCATE);
 170   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 171
 172   PUT_CODE (&all.shift, ASHIFT);
 173   XEXP (&all.shift, 0) = &all.reg;
 174
 175   PUT_CODE (&all.shift_mult, MULT);
 176   XEXP (&all.shift_mult, 0) = &all.reg;
 177
 178   PUT_CODE (&all.shift_add, PLUS);
 179   XEXP (&all.shift_add, 0) = &all.shift_mult;
 180   XEXP (&all.shift_add, 1) = &all.reg;
 181
 182   PUT_CODE (&all.shift_sub0, MINUS);
 183   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 184   XEXP (&all.shift_sub0, 1) = &all.reg;
 185
 186   PUT_CODE (&all.shift_sub1, MINUS);
 187   XEXP (&all.shift_sub1, 0) = &all.reg;
 188   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 189
 190   for (speed = 0; speed < 2; speed++)
 191     {
 192       crtl->maybe_hot_insn_p = speed;
 193       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 194
 195       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 196            mode != VOIDmode;
 197            mode = GET_MODE_WIDER_MODE (mode))
 198         {
 199           PUT_MODE (&all.reg, mode);
 200           PUT_MODE (&all.plus, mode);
 201           PUT_MODE (&all.neg, mode);
 202           PUT_MODE (&all.mult, mode);
 203           PUT_MODE (&all.sdiv, mode);
 204           PUT_MODE (&all.udiv, mode);
 205           PUT_MODE (&all.sdiv_32, mode);
 206           PUT_MODE (&all.smod_32, mode);
 207           PUT_MODE (&all.wide_trunc, mode);
 208           PUT_MODE (&all.shift, mode);
 209           PUT_MODE (&all.shift_mult, mode);
 210           PUT_MODE (&all.shift_add, mode);
 211           PUT_MODE (&all.shift_sub0, mode);
 212           PUT_MODE (&all.shift_sub1, mode);
 213
 214           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 215           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 216           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 217           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 218           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 219
 220           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 221                                           <= 2 * add_cost[speed][mode]);
 222           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 223                                           <= 4 * add_cost[speed][mode]);
 224
 225           wider_mode = GET_MODE_WIDER_MODE (mode);
 226           if (wider_mode != VOIDmode)
 227             {
 228               PUT_MODE (&all.zext, wider_mode);
 229               PUT_MODE (&all.wide_mult, wider_mode);
 230               PUT_MODE (&all.wide_lshr, wider_mode);
 231               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233               mul_widen_cost[speed][wider_mode]
 234                 = rtx_cost (&all.wide_mult, SET, speed);
 235               mul_highpart_cost[speed][mode]
 236                 = rtx_cost (&all.wide_trunc, SET, speed);
 237             }
 238
 239           shift_cost[speed][mode][0] = 0;
 240           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 241             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 242
 243           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 244           for (m = 1; m < n; m++)
 245             {
 246               XEXP (&all.shift, 1) = cint[m];
 247               XEXP (&all.shift_mult, 1) = pow2[m];
 248
 249               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 250               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 251               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 252               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 253             }
 254         }
 255     }
 256   if (alg_hash_used_p)
 257     memset (alg_hash, 0, sizeof (alg_hash));
 258   else
 259     alg_hash_used_p = true;
 260   default_rtl_profile ();
 261 }
 262
 263 /* Return an rtx representing minus the value of X.
 264    MODE is the intended mode of the result,
 265    useful if X is a CONST_INT.  */
 266
 267 rtx
 268 negate_rtx (enum machine_mode mode, rtx x)
 269 {
 270   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 271
 272   if (result == 0)
 273     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 274
 275   return result;
 276 }
 277
 278 /* Report on the availability of insv/extv/extzv and the desired mode
 279    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 280    is false; else the mode of the specified operand.  If OPNO is -1,
 281    all the caller cares about is whether the insn is available.  */
 282 enum machine_mode
 283 mode_for_extraction (enum extraction_pattern pattern, int opno)
 284 {
 285   const struct insn_data_d *data;
 286
 287   switch (pattern)
 288     {
 289     case EP_insv:
 290       if (HAVE_insv)
 291         {
 292           data = &insn_data[CODE_FOR_insv];
 293           break;
 294         }
 295       return MAX_MACHINE_MODE;
 296
 297     case EP_extv:
 298       if (HAVE_extv)
 299         {
 300           data = &insn_data[CODE_FOR_extv];
 301           break;
 302         }
 303       return MAX_MACHINE_MODE;
 304
 305     case EP_extzv:
 306       if (HAVE_extzv)
 307         {
 308           data = &insn_data[CODE_FOR_extzv];
 309           break;
 310         }
 311       return MAX_MACHINE_MODE;
 312
 313     default:
 314       gcc_unreachable ();
 315     }
 316
 317   if (opno == -1)
 318     return VOIDmode;
 319
 320   /* Everyone who uses this function used to follow it with
 321      if (result == VOIDmode) result = word_mode; */
 322   if (data->operand[opno].mode == VOIDmode)
 323     return word_mode;
 324   return data->operand[opno].mode;
 325 }
 326
 327 /* Return true if X, of mode MODE, matches the predicate for operand
 328    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 329    the ambient volatile_ok setting.  */
 330
 331 static bool
 332 check_predicate_volatile_ok (enum insn_code icode, int opno,
 333                              rtx x, enum machine_mode mode)
 334 {
 335   bool save_volatile_ok, result;
 336
 337   save_volatile_ok = volatile_ok;
 338   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 339   volatile_ok = save_volatile_ok;
 340   return result;
 341 }
 342 \f
 343 /* A subroutine of store_bit_field, with the same arguments.  Return true
 344    if the operation could be implemented.
 345
 346    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 347    no other way of implementing the operation.  If FALLBACK_P is false,
 348    return false instead.  */
 349
 350 static bool
 351 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 352                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 353                    rtx value, bool fallback_p)
 354 {
 355   unsigned int unit
 356     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 357   unsigned HOST_WIDE_INT offset, bitpos;
 358   rtx op0 = str_rtx;
 359   int byte_offset;
 360   rtx orig_value;
 361
 362   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 363
 364   while (GET_CODE (op0) == SUBREG)
 365     {
 366       /* The following line once was done only if WORDS_BIG_ENDIAN,
 367          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 368          meaningful at a much higher level; when structures are copied
 369          between memory and regs, the higher-numbered regs
 370          always get higher addresses.  */
 371       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 372       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 373
 374       byte_offset = 0;
 375
 376       /* Paradoxical subregs need special handling on big endian machines.  */
 377       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 378         {
 379           int difference = inner_mode_size - outer_mode_size;
 380
 381           if (WORDS_BIG_ENDIAN)
 382             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 383           if (BYTES_BIG_ENDIAN)
 384             byte_offset += difference % UNITS_PER_WORD;
 385         }
 386       else
 387         byte_offset = SUBREG_BYTE (op0);
 388
 389       bitnum += byte_offset * BITS_PER_UNIT;
 390       op0 = SUBREG_REG (op0);
 391     }
 392
 393   /* No action is needed if the target is a register and if the field
 394      lies completely outside that register.  This can occur if the source
 395      code contains an out-of-bounds access to a small array.  */
 396   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 397     return true;
 398
 399   /* Use vec_set patterns for inserting parts of vectors whenever
 400      available.  */
 401   if (VECTOR_MODE_P (GET_MODE (op0))
 402       && !MEM_P (op0)
 403       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 404       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 405       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 406       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 407     {
 408       enum machine_mode outermode = GET_MODE (op0);
 409       enum machine_mode innermode = GET_MODE_INNER (outermode);
 410       int icode = (int) optab_handler (vec_set_optab, outermode);
 411       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 412       rtx rtxpos = GEN_INT (pos);
 413       rtx src = value;
 414       rtx dest = op0;
 415       rtx pat, seq;
 416       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 417       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 418       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 419
 420       start_sequence ();
 421
 422       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 423         src = copy_to_mode_reg (mode1, src);
 424
 425       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 426         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 427
 428       /* We could handle this, but we should always be called with a pseudo
 429          for our targets and all insns should take them as outputs.  */
 430       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 431                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 432                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 433       pat = GEN_FCN (icode) (dest, src, rtxpos);
 434       seq = get_insns ();
 435       end_sequence ();
 436       if (pat)
 437         {
 438           emit_insn (seq);
 439           emit_insn (pat);
 440           return true;
 441         }
 442     }
 443
 444   /* If the target is a register, overwriting the entire object, or storing
 445      a full-word or multi-word field can be done with just a SUBREG.
 446
 447      If the target is memory, storing any naturally aligned field can be
 448      done with a simple store.  For targets that support fast unaligned
 449      memory, any naturally sized, unit aligned field can be done directly.  */
 450
 451   offset = bitnum / unit;
 452   bitpos = bitnum % unit;
 453   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 454                 + (offset * UNITS_PER_WORD);
 455
 456   if (bitpos == 0
 457       && bitsize == GET_MODE_BITSIZE (fieldmode)
 458       && (!MEM_P (op0)
 459           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 460               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 461              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 462                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 463                                      byte_offset)))
 464           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 465              || (offset * BITS_PER_UNIT % bitsize == 0
 466                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 467     {
 468       if (MEM_P (op0))
 469         op0 = adjust_address (op0, fieldmode, offset);
 470       else if (GET_MODE (op0) != fieldmode)
 471         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 472                                    byte_offset);
 473       emit_move_insn (op0, value);
 474       return true;
 475     }
 476
 477   /* Make sure we are playing with integral modes.  Pun with subregs
 478      if we aren't.  This must come after the entire register case above,
 479      since that case is valid for any mode.  The following cases are only
 480      valid for integral modes.  */
 481   {
 482     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 483     if (imode != GET_MODE (op0))
 484       {
 485         if (MEM_P (op0))
 486           op0 = adjust_address (op0, imode, 0);
 487         else
 488           {
 489             gcc_assert (imode != BLKmode);
 490             op0 = gen_lowpart (imode, op0);
 491           }
 492       }
 493   }
 494
 495   /* We may be accessing data outside the field, which means
 496      we can alias adjacent data.  */
 497   if (MEM_P (op0))
 498     {
 499       op0 = shallow_copy_rtx (op0);
 500       set_mem_alias_set (op0, 0);
 501       set_mem_expr (op0, 0);
 502     }
 503
 504   /* If OP0 is a register, BITPOS must count within a word.
 505      But as we have it, it counts within whatever size OP0 now has.
 506      On a bigendian machine, these are not the same, so convert.  */
 507   if (BYTES_BIG_ENDIAN
 508       && !MEM_P (op0)
 509       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 510     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 511
 512   /* Storing an lsb-aligned field in a register
 513      can be done with a movestrict instruction.  */
 514
 515   if (!MEM_P (op0)
 516       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 517       && bitsize == GET_MODE_BITSIZE (fieldmode)
 518       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 519     {
 520       int icode = optab_handler (movstrict_optab, fieldmode);
 521       rtx insn;
 522       rtx start = get_last_insn ();
 523       rtx arg0 = op0;
 524       unsigned HOST_WIDE_INT subreg_off;
 525
 526       /* Get appropriate low part of the value being stored.  */
 527       if (CONST_INT_P (value) || REG_P (value))
 528         value = gen_lowpart (fieldmode, value);
 529       else if (!(GET_CODE (value) == SYMBOL_REF
 530                  || GET_CODE (value) == LABEL_REF
 531                  || GET_CODE (value) == CONST))
 532         value = convert_to_mode (fieldmode, value, 0);
 533
 534       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 535         value = copy_to_mode_reg (fieldmode, value);
 536
 537       if (GET_CODE (op0) == SUBREG)
 538         {
 539           /* Else we've got some float mode source being extracted into
 540              a different float mode destination -- this combination of
 541              subregs results in Severe Tire Damage.  */
 542           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 543                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 544                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 545           arg0 = SUBREG_REG (op0);
 546         }
 547
 548       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 549                    + (offset * UNITS_PER_WORD);
 550       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 551         {
 552           insn = (GEN_FCN (icode) (gen_rtx_SUBREG (fieldmode, arg0,
 553                                                    subreg_off), value));
 554           if (insn)
 555             {
 556               emit_insn (insn);
 557               return true;
 558             }
 559         }
 560       delete_insns_since (start);
 561     }
 562
 563   /* Handle fields bigger than a word.  */
 564
 565   if (bitsize > BITS_PER_WORD)
 566     {
 567       /* Here we transfer the words of the field
 568          in the order least significant first.
 569          This is because the most significant word is the one which may
 570          be less than full.
 571          However, only do that if the value is not BLKmode.  */
 572
 573       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 574       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 575       unsigned int i;
 576       rtx last;
 577
 578       /* This is the mode we must force value to, so that there will be enough
 579          subwords to extract.  Note that fieldmode will often (always?) be
 580          VOIDmode, because that is what store_field uses to indicate that this
 581          is a bit field, but passing VOIDmode to operand_subword_force
 582          is not allowed.  */
 583       fieldmode = GET_MODE (value);
 584       if (fieldmode == VOIDmode)
 585         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 586
 587       last = get_last_insn ();
 588       for (i = 0; i < nwords; i++)
 589         {
 590           /* If I is 0, use the low-order word in both field and target;
 591              if I is 1, use the next to lowest word; and so on.  */
 592           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 593           unsigned int bit_offset = (backwards
 594                                      ? MAX ((int) bitsize - ((int) i + 1)
 595                                             * BITS_PER_WORD,
 596                                             0)
 597                                      : (int) i * BITS_PER_WORD);
 598           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 599
 600           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 601                                             bitsize - i * BITS_PER_WORD),
 602                                   bitnum + bit_offset, word_mode,
 603                                   value_word, fallback_p))
 604             {
 605               delete_insns_since (last);
 606               return false;
 607             }
 608         }
 609       return true;
 610     }
 611
 612   /* From here on we can assume that the field to be stored in is
 613      a full-word (whatever type that is), since it is shorter than a word.  */
 614
 615   /* OFFSET is the number of words or bytes (UNIT says which)
 616      from STR_RTX to the first word or byte containing part of the field.  */
 617
 618   if (!MEM_P (op0))
 619     {
 620       if (offset != 0
 621           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 622         {
 623           if (!REG_P (op0))
 624             {
 625               /* Since this is a destination (lvalue), we can't copy
 626                  it to a pseudo.  We can remove a SUBREG that does not
 627                  change the size of the operand.  Such a SUBREG may
 628                  have been added above.  */
 629               gcc_assert (GET_CODE (op0) == SUBREG
 630                           && (GET_MODE_SIZE (GET_MODE (op0))
 631                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 632               op0 = SUBREG_REG (op0);
 633             }
 634           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 635                                 op0, (offset * UNITS_PER_WORD));
 636         }
 637       offset = 0;
 638     }
 639
 640   /* If VALUE has a floating-point or complex mode, access it as an
 641      integer of the corresponding size.  This can occur on a machine
 642      with 64 bit registers that uses SFmode for float.  It can also
 643      occur for unaligned float or complex fields.  */
 644   orig_value = value;
 645   if (GET_MODE (value) != VOIDmode
 646       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 647       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 648     {
 649       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 650       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 651     }
 652
 653   /* Now OFFSET is nonzero only if OP0 is memory
 654      and is therefore always measured in bytes.  */
 655
 656   if (HAVE_insv
 657       && GET_MODE (value) != BLKmode
 658       && bitsize > 0
 659       && GET_MODE_BITSIZE (op_mode) >= bitsize
 660       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 661             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 662       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 663                                                         VOIDmode)
 664       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 665     {
 666       int xbitpos = bitpos;
 667       rtx value1;
 668       rtx xop0 = op0;
 669       rtx last = get_last_insn ();
 670       rtx pat;
 671       bool copy_back = false;
 672
 673       /* Add OFFSET into OP0's address.  */
 674       if (MEM_P (xop0))
 675         xop0 = adjust_address (xop0, byte_mode, offset);
 676
 677       /* If xop0 is a register, we need it in OP_MODE
 678          to make it acceptable to the format of insv.  */
 679       if (GET_CODE (xop0) == SUBREG)
 680         /* We can't just change the mode, because this might clobber op0,
 681            and we will need the original value of op0 if insv fails.  */
 682         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 683       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 684         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 685
 686       /* If the destination is a paradoxical subreg such that we need a
 687          truncate to the inner mode, perform the insertion on a temporary and
 688          truncate the result to the original destination.  Note that we can't
 689          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 690          X) 0)) is (reg:N X).  */
 691       if (GET_CODE (xop0) == SUBREG
 692           && REG_P (SUBREG_REG (xop0))
 693           && (!TRULY_NOOP_TRUNCATION
 694               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 695                GET_MODE_BITSIZE (op_mode))))
 696         {
 697           rtx tem = gen_reg_rtx (op_mode);
 698           emit_move_insn (tem, xop0);
 699           xop0 = tem;
 700           copy_back = true;
 701         }
 702
 703       /* On big-endian machines, we count bits from the most significant.
 704          If the bit field insn does not, we must invert.  */
 705
 706       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 707         xbitpos = unit - bitsize - xbitpos;
 708
 709       /* We have been counting XBITPOS within UNIT.
 710          Count instead within the size of the register.  */
 711       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 712         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 713
 714       unit = GET_MODE_BITSIZE (op_mode);
 715
 716       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 717       value1 = value;
 718       if (GET_MODE (value) != op_mode)
 719         {
 720           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 721             {
 722               /* Optimization: Don't bother really extending VALUE
 723                  if it has all the bits we will actually use.  However,
 724                  if we must narrow it, be sure we do it correctly.  */
 725
 726               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 727                 {
 728                   rtx tmp;
 729
 730                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 731                   if (! tmp)
 732                     tmp = simplify_gen_subreg (op_mode,
 733                                                force_reg (GET_MODE (value),
 734                                                           value1),
 735                                                GET_MODE (value), 0);
 736                   value1 = tmp;
 737                 }
 738               else
 739                 value1 = gen_lowpart (op_mode, value1);
 740             }
 741           else if (CONST_INT_P (value))
 742             value1 = gen_int_mode (INTVAL (value), op_mode);
 743           else
 744             /* Parse phase is supposed to make VALUE's data type
 745                match that of the component reference, which is a type
 746                at least as wide as the field; so VALUE should have
 747                a mode that corresponds to that type.  */
 748             gcc_assert (CONSTANT_P (value));
 749         }
 750
 751       /* If this machine's insv insists on a register,
 752          get VALUE1 into a register.  */
 753       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 754              (value1, op_mode)))
 755         value1 = force_reg (op_mode, value1);
 756
 757       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 758       if (pat)
 759         {
 760           emit_insn (pat);
 761
 762           if (copy_back)
 763             convert_move (op0, xop0, true);
 764           return true;
 765         }
 766       delete_insns_since (last);
 767     }
 768
 769   /* If OP0 is a memory, try copying it to a register and seeing if a
 770      cheap register alternative is available.  */
 771   if (HAVE_insv && MEM_P (op0))
 772     {
 773       enum machine_mode bestmode;
 774
 775       /* Get the mode to use for inserting into this field.  If OP0 is
 776          BLKmode, get the smallest mode consistent with the alignment. If
 777          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 778          mode. Otherwise, use the smallest mode containing the field.  */
 779
 780       if (GET_MODE (op0) == BLKmode
 781           || (op_mode != MAX_MACHINE_MODE
 782               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 783         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 784                                   (op_mode == MAX_MACHINE_MODE
 785                                    ? VOIDmode : op_mode),
 786                                   MEM_VOLATILE_P (op0));
 787       else
 788         bestmode = GET_MODE (op0);
 789
 790       if (bestmode != VOIDmode
 791           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 792           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 793                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 794         {
 795           rtx last, tempreg, xop0;
 796           unsigned HOST_WIDE_INT xoffset, xbitpos;
 797
 798           last = get_last_insn ();
 799
 800           /* Adjust address to point to the containing unit of
 801              that mode.  Compute the offset as a multiple of this unit,
 802              counting in bytes.  */
 803           unit = GET_MODE_BITSIZE (bestmode);
 804           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 805           xbitpos = bitnum % unit;
 806           xop0 = adjust_address (op0, bestmode, xoffset);
 807
 808           /* Fetch that unit, store the bitfield in it, then store
 809              the unit.  */
 810           tempreg = copy_to_reg (xop0);
 811           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 812                                  fieldmode, orig_value, false))
 813             {
 814               emit_move_insn (xop0, tempreg);
 815               return true;
 816             }
 817           delete_insns_since (last);
 818         }
 819     }
 820
 821   if (!fallback_p)
 822     return false;
 823
 824   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 825   return true;
 826 }
 827
 828 /* Generate code to store value from rtx VALUE
 829    into a bit-field within structure STR_RTX
 830    containing BITSIZE bits starting at bit BITNUM.
 831    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 832
 833 void
 834 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 835                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 836                  rtx value)
 837 {
 838   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 839     gcc_unreachable ();
 840 }
 841 \f
 842 /* Use shifts and boolean operations to store VALUE
 843    into a bit field of width BITSIZE
 844    in a memory location specified by OP0 except offset by OFFSET bytes.
 845      (OFFSET must be 0 if OP0 is a register.)
 846    The field starts at position BITPOS within the byte.
 847     (If OP0 is a register, it may be a full word or a narrower mode,
 848      but BITPOS still counts within a full word,
 849      which is significant on bigendian machines.)  */
 850
 851 static void
 852 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 853                        unsigned HOST_WIDE_INT bitsize,
 854                        unsigned HOST_WIDE_INT bitpos, rtx value)
 855 {
 856   enum machine_mode mode;
 857   unsigned int total_bits = BITS_PER_WORD;
 858   rtx temp;
 859   int all_zero = 0;
 860   int all_one = 0;
 861
 862   /* There is a case not handled here:
 863      a structure with a known alignment of just a halfword
 864      and a field split across two aligned halfwords within the structure.
 865      Or likewise a structure with a known alignment of just a byte
 866      and a field split across two bytes.
 867      Such cases are not supposed to be able to occur.  */
 868
 869   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 870     {
 871       gcc_assert (!offset);
 872       /* Special treatment for a bit field split across two registers.  */
 873       if (bitsize + bitpos > BITS_PER_WORD)
 874         {
 875           store_split_bit_field (op0, bitsize, bitpos, value);
 876           return;
 877         }
 878     }
 879   else
 880     {
 881       /* Get the proper mode to use for this field.  We want a mode that
 882          includes the entire field.  If such a mode would be larger than
 883          a word, we won't be doing the extraction the normal way.
 884          We don't want a mode bigger than the destination.  */
 885
 886       mode = GET_MODE (op0);
 887       if (GET_MODE_BITSIZE (mode) == 0
 888           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 889         mode = word_mode;
 890
 891       if (MEM_VOLATILE_P (op0)
 892           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 893           && flag_strict_volatile_bitfields > 0)
 894         mode = GET_MODE (op0);
 895       else
 896         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 897                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 898
 899       if (mode == VOIDmode)
 900         {
 901           /* The only way this should occur is if the field spans word
 902              boundaries.  */
 903           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 904                                  value);
 905           return;
 906         }
 907
 908       total_bits = GET_MODE_BITSIZE (mode);
 909
 910       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 911          be in the range 0 to total_bits-1, and put any excess bytes in
 912          OFFSET.  */
 913       if (bitpos >= total_bits)
 914         {
 915           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 916           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 917                      * BITS_PER_UNIT);
 918         }
 919
 920       /* Get ref to an aligned byte, halfword, or word containing the field.
 921          Adjust BITPOS to be position within a word,
 922          and OFFSET to be the offset of that word.
 923          Then alter OP0 to refer to that word.  */
 924       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 925       offset -= (offset % (total_bits / BITS_PER_UNIT));
 926       op0 = adjust_address (op0, mode, offset);
 927     }
 928
 929   mode = GET_MODE (op0);
 930
 931   /* Now MODE is either some integral mode for a MEM as OP0,
 932      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 933      The bit field is contained entirely within OP0.
 934      BITPOS is the starting bit number within OP0.
 935      (OP0's mode may actually be narrower than MODE.)  */
 936
 937   if (BYTES_BIG_ENDIAN)
 938       /* BITPOS is the distance between our msb
 939          and that of the containing datum.
 940          Convert it to the distance from the lsb.  */
 941       bitpos = total_bits - bitsize - bitpos;
 942
 943   /* Now BITPOS is always the distance between our lsb
 944      and that of OP0.  */
 945
 946   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 947      we must first convert its mode to MODE.  */
 948
 949   if (CONST_INT_P (value))
 950     {
 951       HOST_WIDE_INT v = INTVAL (value);
 952
 953       if (bitsize < HOST_BITS_PER_WIDE_INT)
 954         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 955
 956       if (v == 0)
 957         all_zero = 1;
 958       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 959                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 960                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 961         all_one = 1;
 962
 963       value = lshift_value (mode, value, bitpos, bitsize);
 964     }
 965   else
 966     {
 967       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 968                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 969
 970       if (GET_MODE (value) != mode)
 971         value = convert_to_mode (mode, value, 1);
 972
 973       if (must_and)
 974         value = expand_binop (mode, and_optab, value,
 975                               mask_rtx (mode, 0, bitsize, 0),
 976                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 977       if (bitpos > 0)
 978         value = expand_shift (LSHIFT_EXPR, mode, value,
 979                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 980     }
 981
 982   /* Now clear the chosen bits in OP0,
 983      except that if VALUE is -1 we need not bother.  */
 984   /* We keep the intermediates in registers to allow CSE to combine
 985      consecutive bitfield assignments.  */
 986
 987   temp = force_reg (mode, op0);
 988
 989   if (! all_one)
 990     {
 991       temp = expand_binop (mode, and_optab, temp,
 992                            mask_rtx (mode, bitpos, bitsize, 1),
 993                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 994       temp = force_reg (mode, temp);
 995     }
 996
 997   /* Now logical-or VALUE into OP0, unless it is zero.  */
 998
 999   if (! all_zero)
1000     {
1001       temp = expand_binop (mode, ior_optab, temp, value,
1002                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1003       temp = force_reg (mode, temp);
1004     }
1005
1006   if (op0 != temp)
1007     {
1008       op0 = copy_rtx (op0);
1009       emit_move_insn (op0, temp);
1010     }
1011 }
1012 \f
1013 /* Store a bit field that is split across multiple accessible memory objects.
1014
1015    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1016    BITSIZE is the field width; BITPOS the position of its first bit
1017    (within the word).
1018    VALUE is the value to store.
1019
1020    This does not yet handle fields wider than BITS_PER_WORD.  */
1021
1022 static void
1023 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1024                        unsigned HOST_WIDE_INT bitpos, rtx value)
1025 {
1026   unsigned int unit;
1027   unsigned int bitsdone = 0;
1028
1029   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1030      much at a time.  */
1031   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1032     unit = BITS_PER_WORD;
1033   else
1034     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1035
1036   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1037      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1038      that VALUE might be a floating-point constant.  */
1039   if (CONSTANT_P (value) && !CONST_INT_P (value))
1040     {
1041       rtx word = gen_lowpart_common (word_mode, value);
1042
1043       if (word && (value != word))
1044         value = word;
1045       else
1046         value = gen_lowpart_common (word_mode,
1047                                     force_reg (GET_MODE (value) != VOIDmode
1048                                                ? GET_MODE (value)
1049                                                : word_mode, value));
1050     }
1051
1052   while (bitsdone < bitsize)
1053     {
1054       unsigned HOST_WIDE_INT thissize;
1055       rtx part, word;
1056       unsigned HOST_WIDE_INT thispos;
1057       unsigned HOST_WIDE_INT offset;
1058
1059       offset = (bitpos + bitsdone) / unit;
1060       thispos = (bitpos + bitsdone) % unit;
1061
1062       /* THISSIZE must not overrun a word boundary.  Otherwise,
1063          store_fixed_bit_field will call us again, and we will mutually
1064          recurse forever.  */
1065       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1066       thissize = MIN (thissize, unit - thispos);
1067
1068       if (BYTES_BIG_ENDIAN)
1069         {
1070           int total_bits;
1071
1072           /* We must do an endian conversion exactly the same way as it is
1073              done in extract_bit_field, so that the two calls to
1074              extract_fixed_bit_field will have comparable arguments.  */
1075           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1076             total_bits = BITS_PER_WORD;
1077           else
1078             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1079
1080           /* Fetch successively less significant portions.  */
1081           if (CONST_INT_P (value))
1082             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1083                              >> (bitsize - bitsdone - thissize))
1084                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1085           else
1086             /* The args are chosen so that the last part includes the
1087                lsb.  Give extract_bit_field the value it needs (with
1088                endianness compensation) to fetch the piece we want.  */
1089             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1090                                             total_bits - bitsize + bitsdone,
1091                                             NULL_RTX, 1, false);
1092         }
1093       else
1094         {
1095           /* Fetch successively more significant portions.  */
1096           if (CONST_INT_P (value))
1097             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1098                              >> bitsdone)
1099                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1100           else
1101             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1102                                             bitsdone, NULL_RTX, 1, false);
1103         }
1104
1105       /* If OP0 is a register, then handle OFFSET here.
1106
1107          When handling multiword bitfields, extract_bit_field may pass
1108          down a word_mode SUBREG of a larger REG for a bitfield that actually
1109          crosses a word boundary.  Thus, for a SUBREG, we must find
1110          the current word starting from the base register.  */
1111       if (GET_CODE (op0) == SUBREG)
1112         {
1113           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1114           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1115           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1116             word = word_offset ? const0_rtx : op0;
1117           else
1118             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1119                                           GET_MODE (SUBREG_REG (op0)));
1120           offset = 0;
1121         }
1122       else if (REG_P (op0))
1123         {
1124           enum machine_mode op0_mode = GET_MODE (op0);
1125           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1126             word = offset ? const0_rtx : op0;
1127           else
1128             word = operand_subword_force (op0, offset, GET_MODE (op0));
1129           offset = 0;
1130         }
1131       else
1132         word = op0;
1133
1134       /* OFFSET is in UNITs, and UNIT is in bits.
1135          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1136          it is just an out-of-bounds access.  Ignore it.  */
1137       if (word != const0_rtx)
1138         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1139                                thispos, part);
1140       bitsdone += thissize;
1141     }
1142 }
1143 \f
1144 /* A subroutine of extract_bit_field_1 that converts return value X
1145    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1146    to extract_bit_field.  */
1147
1148 static rtx
1149 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1150                              enum machine_mode tmode, bool unsignedp)
1151 {
1152   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1153     return x;
1154
1155   /* If the x mode is not a scalar integral, first convert to the
1156      integer mode of that size and then access it as a floating-point
1157      value via a SUBREG.  */
1158   if (!SCALAR_INT_MODE_P (tmode))
1159     {
1160       enum machine_mode smode;
1161
1162       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1163       x = convert_to_mode (smode, x, unsignedp);
1164       x = force_reg (smode, x);
1165       return gen_lowpart (tmode, x);
1166     }
1167
1168   return convert_to_mode (tmode, x, unsignedp);
1169 }
1170
1171 /* A subroutine of extract_bit_field, with the same arguments.
1172    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1173    if we can find no other means of implementing the operation.
1174    if FALLBACK_P is false, return NULL instead.  */
1175
1176 static rtx
1177 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1178                      unsigned HOST_WIDE_INT bitnum,
1179                      int unsignedp, bool packedp, rtx target,
1180                      enum machine_mode mode, enum machine_mode tmode,
1181                      bool fallback_p)
1182 {
1183   unsigned int unit
1184     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1185   unsigned HOST_WIDE_INT offset, bitpos;
1186   rtx op0 = str_rtx;
1187   enum machine_mode int_mode;
1188   enum machine_mode ext_mode;
1189   enum machine_mode mode1;
1190   enum insn_code icode;
1191   int byte_offset;
1192
1193   if (tmode == VOIDmode)
1194     tmode = mode;
1195
1196   while (GET_CODE (op0) == SUBREG)
1197     {
1198       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1199       op0 = SUBREG_REG (op0);
1200     }
1201
1202   /* If we have an out-of-bounds access to a register, just return an
1203      uninitialized register of the required mode.  This can occur if the
1204      source code contains an out-of-bounds access to a small array.  */
1205   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1206     return gen_reg_rtx (tmode);
1207
1208   if (REG_P (op0)
1209       && mode == GET_MODE (op0)
1210       && bitnum == 0
1211       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1212     {
1213       /* We're trying to extract a full register from itself.  */
1214       return op0;
1215     }
1216
1217   /* See if we can get a better vector mode before extracting.  */
1218   if (VECTOR_MODE_P (GET_MODE (op0))
1219       && !MEM_P (op0)
1220       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1221     {
1222       enum machine_mode new_mode;
1223
1224       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1225         new_mode = MIN_MODE_VECTOR_FLOAT;
1226       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1227         new_mode = MIN_MODE_VECTOR_FRACT;
1228       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1229         new_mode = MIN_MODE_VECTOR_UFRACT;
1230       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1231         new_mode = MIN_MODE_VECTOR_ACCUM;
1232       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1233         new_mode = MIN_MODE_VECTOR_UACCUM;
1234       else
1235         new_mode = MIN_MODE_VECTOR_INT;
1236
1237       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1238         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1239             && targetm.vector_mode_supported_p (new_mode))
1240           break;
1241       if (new_mode != VOIDmode)
1242         op0 = gen_lowpart (new_mode, op0);
1243     }
1244
1245   /* Use vec_extract patterns for extracting parts of vectors whenever
1246      available.  */
1247   if (VECTOR_MODE_P (GET_MODE (op0))
1248       && !MEM_P (op0)
1249       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1250       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1251           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1252     {
1253       enum machine_mode outermode = GET_MODE (op0);
1254       enum machine_mode innermode = GET_MODE_INNER (outermode);
1255       int icode = (int) optab_handler (vec_extract_optab, outermode);
1256       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1257       rtx rtxpos = GEN_INT (pos);
1258       rtx src = op0;
1259       rtx dest = NULL, pat, seq;
1260       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1261       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1262       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1263
1264       if (innermode == tmode || innermode == mode)
1265         dest = target;
1266
1267       if (!dest)
1268         dest = gen_reg_rtx (innermode);
1269
1270       start_sequence ();
1271
1272       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1273         dest = copy_to_mode_reg (mode0, dest);
1274
1275       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1276         src = copy_to_mode_reg (mode1, src);
1277
1278       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1279         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1280
1281       /* We could handle this, but we should always be called with a pseudo
1282          for our targets and all insns should take them as outputs.  */
1283       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1284                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1285                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1286
1287       pat = GEN_FCN (icode) (dest, src, rtxpos);
1288       seq = get_insns ();
1289       end_sequence ();
1290       if (pat)
1291         {
1292           emit_insn (seq);
1293           emit_insn (pat);
1294           if (mode0 != mode)
1295             return gen_lowpart (tmode, dest);
1296           return dest;
1297         }
1298     }
1299
1300   /* Make sure we are playing with integral modes.  Pun with subregs
1301      if we aren't.  */
1302   {
1303     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1304     if (imode != GET_MODE (op0))
1305       {
1306         if (MEM_P (op0))
1307           op0 = adjust_address (op0, imode, 0);
1308         else if (imode != BLKmode)
1309           {
1310             op0 = gen_lowpart (imode, op0);
1311
1312             /* If we got a SUBREG, force it into a register since we
1313                aren't going to be able to do another SUBREG on it.  */
1314             if (GET_CODE (op0) == SUBREG)
1315               op0 = force_reg (imode, op0);
1316           }
1317         else if (REG_P (op0))
1318           {
1319             rtx reg, subreg;
1320             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1321                                             MODE_INT);
1322             reg = gen_reg_rtx (imode);
1323             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1324             emit_move_insn (subreg, op0);
1325             op0 = reg;
1326             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1327           }
1328         else
1329           {
1330             rtx mem = assign_stack_temp (GET_MODE (op0),
1331                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1332             emit_move_insn (mem, op0);
1333             op0 = adjust_address (mem, BLKmode, 0);
1334           }
1335       }
1336   }
1337
1338   /* We may be accessing data outside the field, which means
1339      we can alias adjacent data.  */
1340   if (MEM_P (op0))
1341     {
1342       op0 = shallow_copy_rtx (op0);
1343       set_mem_alias_set (op0, 0);
1344       set_mem_expr (op0, 0);
1345     }
1346
1347   /* Extraction of a full-word or multi-word value from a structure
1348      in a register or aligned memory can be done with just a SUBREG.
1349      A subword value in the least significant part of a register
1350      can also be extracted with a SUBREG.  For this, we need the
1351      byte offset of the value in op0.  */
1352
1353   bitpos = bitnum % unit;
1354   offset = bitnum / unit;
1355   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1356
1357   /* If OP0 is a register, BITPOS must count within a word.
1358      But as we have it, it counts within whatever size OP0 now has.
1359      On a bigendian machine, these are not the same, so convert.  */
1360   if (BYTES_BIG_ENDIAN
1361       && !MEM_P (op0)
1362       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1363     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1364
1365   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1366      If that's wrong, the solution is to test for it and set TARGET to 0
1367      if needed.  */
1368
1369   /* Only scalar integer modes can be converted via subregs.  There is an
1370      additional problem for FP modes here in that they can have a precision
1371      which is different from the size.  mode_for_size uses precision, but
1372      we want a mode based on the size, so we must avoid calling it for FP
1373      modes.  */
1374   mode1  = (SCALAR_INT_MODE_P (tmode)
1375             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1376             : mode);
1377
1378   /* If the bitfield is volatile, we need to make sure the access
1379      remains on a type-aligned boundary.  */
1380   if (GET_CODE (op0) == MEM
1381       && MEM_VOLATILE_P (op0)
1382       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1383       && flag_strict_volatile_bitfields > 0)
1384     goto no_subreg_mode_swap;
1385
1386   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1387         && bitpos % BITS_PER_WORD == 0)
1388        || (mode1 != BLKmode
1389            /* ??? The big endian test here is wrong.  This is correct
1390               if the value is in a register, and if mode_for_size is not
1391               the same mode as op0.  This causes us to get unnecessarily
1392               inefficient code from the Thumb port when -mbig-endian.  */
1393            && (BYTES_BIG_ENDIAN
1394                ? bitpos + bitsize == BITS_PER_WORD
1395                : bitpos == 0)))
1396       && ((!MEM_P (op0)
1397            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1398                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1399            && GET_MODE_SIZE (mode1) != 0
1400            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1401           || (MEM_P (op0)
1402               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1403                   || (offset * BITS_PER_UNIT % bitsize == 0
1404                       && MEM_ALIGN (op0) % bitsize == 0)))))
1405     {
1406       if (MEM_P (op0))
1407         op0 = adjust_address (op0, mode1, offset);
1408       else if (mode1 != GET_MODE (op0))
1409         {
1410           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1411                                          byte_offset);
1412           if (sub == NULL)
1413             goto no_subreg_mode_swap;
1414           op0 = sub;
1415         }
1416       if (mode1 != mode)
1417         return convert_to_mode (tmode, op0, unsignedp);
1418       return op0;
1419     }
1420  no_subreg_mode_swap:
1421
1422   /* Handle fields bigger than a word.  */
1423
1424   if (bitsize > BITS_PER_WORD)
1425     {
1426       /* Here we transfer the words of the field
1427          in the order least significant first.
1428          This is because the most significant word is the one which may
1429          be less than full.  */
1430
1431       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1432       unsigned int i;
1433
1434       if (target == 0 || !REG_P (target))
1435         target = gen_reg_rtx (mode);
1436
1437       /* Indicate for flow that the entire target reg is being set.  */
1438       emit_clobber (target);
1439
1440       for (i = 0; i < nwords; i++)
1441         {
1442           /* If I is 0, use the low-order word in both field and target;
1443              if I is 1, use the next to lowest word; and so on.  */
1444           /* Word number in TARGET to use.  */
1445           unsigned int wordnum
1446             = (WORDS_BIG_ENDIAN
1447                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1448                : i);
1449           /* Offset from start of field in OP0.  */
1450           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1451                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1452                                                 * (int) BITS_PER_WORD))
1453                                      : (int) i * BITS_PER_WORD);
1454           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1455           rtx result_part
1456             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1457                                            bitsize - i * BITS_PER_WORD),
1458                                  bitnum + bit_offset, 1, false, target_part, mode,
1459                                  word_mode);
1460
1461           gcc_assert (target_part);
1462
1463           if (result_part != target_part)
1464             emit_move_insn (target_part, result_part);
1465         }
1466
1467       if (unsignedp)
1468         {
1469           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1470              need to be zero'd out.  */
1471           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1472             {
1473               unsigned int i, total_words;
1474
1475               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1476               for (i = nwords; i < total_words; i++)
1477                 emit_move_insn
1478                   (operand_subword (target,
1479                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1480                                     1, VOIDmode),
1481                    const0_rtx);
1482             }
1483           return target;
1484         }
1485
1486       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1487       target = expand_shift (LSHIFT_EXPR, mode, target,
1488                              build_int_cst (NULL_TREE,
1489                                             GET_MODE_BITSIZE (mode) - bitsize),
1490                              NULL_RTX, 0);
1491       return expand_shift (RSHIFT_EXPR, mode, target,
1492                            build_int_cst (NULL_TREE,
1493                                           GET_MODE_BITSIZE (mode) - bitsize),
1494                            NULL_RTX, 0);
1495     }
1496
1497   /* From here on we know the desired field is smaller than a word.  */
1498
1499   /* Check if there is a correspondingly-sized integer field, so we can
1500      safely extract it as one size of integer, if necessary; then
1501      truncate or extend to the size that is wanted; then use SUBREGs or
1502      convert_to_mode to get one of the modes we really wanted.  */
1503
1504   int_mode = int_mode_for_mode (tmode);
1505   if (int_mode == BLKmode)
1506     int_mode = int_mode_for_mode (mode);
1507   /* Should probably push op0 out to memory and then do a load.  */
1508   gcc_assert (int_mode != BLKmode);
1509
1510   /* OFFSET is the number of words or bytes (UNIT says which)
1511      from STR_RTX to the first word or byte containing part of the field.  */
1512   if (!MEM_P (op0))
1513     {
1514       if (offset != 0
1515           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1516         {
1517           if (!REG_P (op0))
1518             op0 = copy_to_reg (op0);
1519           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1520                                 op0, (offset * UNITS_PER_WORD));
1521         }
1522       offset = 0;
1523     }
1524
1525   /* Now OFFSET is nonzero only for memory operands.  */
1526   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1527   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1528   if (ext_mode != MAX_MACHINE_MODE
1529       && bitsize > 0
1530       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1531       /* If op0 is a register, we need it in EXT_MODE to make it
1532          acceptable to the format of ext(z)v.  */
1533       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1534       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1535            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1536       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1537     {
1538       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1539       rtx bitsize_rtx, bitpos_rtx;
1540       rtx last = get_last_insn ();
1541       rtx xop0 = op0;
1542       rtx xtarget = target;
1543       rtx xspec_target = target;
1544       rtx xspec_target_subreg = 0;
1545       rtx pat;
1546
1547       /* If op0 is a register, we need it in EXT_MODE to make it
1548          acceptable to the format of ext(z)v.  */
1549       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1550         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1551       if (MEM_P (xop0))
1552         /* Get ref to first byte containing part of the field.  */
1553         xop0 = adjust_address (xop0, byte_mode, xoffset);
1554
1555       /* On big-endian machines, we count bits from the most significant.
1556          If the bit field insn does not, we must invert.  */
1557       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1558         xbitpos = unit - bitsize - xbitpos;
1559
1560       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1561       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1562         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1563
1564       unit = GET_MODE_BITSIZE (ext_mode);
1565
1566       if (xtarget == 0)
1567         xtarget = xspec_target = gen_reg_rtx (tmode);
1568
1569       if (GET_MODE (xtarget) != ext_mode)
1570         {
1571           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1572              between the mode of the extraction (word_mode) and the target
1573              mode.  Instead, create a temporary and use convert_move to set
1574              the target.  */
1575           if (REG_P (xtarget)
1576               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1577                                         GET_MODE_BITSIZE (ext_mode)))
1578             {
1579               xtarget = gen_lowpart (ext_mode, xtarget);
1580               if (GET_MODE_SIZE (ext_mode)
1581                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1582                 xspec_target_subreg = xtarget;
1583             }
1584           else
1585             xtarget = gen_reg_rtx (ext_mode);
1586         }
1587
1588       /* If this machine's ext(z)v insists on a register target,
1589          make sure we have one.  */
1590       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1591         xtarget = gen_reg_rtx (ext_mode);
1592
1593       bitsize_rtx = GEN_INT (bitsize);
1594       bitpos_rtx = GEN_INT (xbitpos);
1595
1596       pat = (unsignedp
1597              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1598              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1599       if (pat)
1600         {
1601           emit_insn (pat);
1602           if (xtarget == xspec_target)
1603             return xtarget;
1604           if (xtarget == xspec_target_subreg)
1605             return xspec_target;
1606           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1607         }
1608       delete_insns_since (last);
1609     }
1610
1611   /* If OP0 is a memory, try copying it to a register and seeing if a
1612      cheap register alternative is available.  */
1613   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1614     {
1615       enum machine_mode bestmode;
1616
1617       /* Get the mode to use for inserting into this field.  If
1618          OP0 is BLKmode, get the smallest mode consistent with the
1619          alignment. If OP0 is a non-BLKmode object that is no
1620          wider than EXT_MODE, use its mode. Otherwise, use the
1621          smallest mode containing the field.  */
1622
1623       if (GET_MODE (op0) == BLKmode
1624           || (ext_mode != MAX_MACHINE_MODE
1625               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1626         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1627                                   (ext_mode == MAX_MACHINE_MODE
1628                                    ? VOIDmode : ext_mode),
1629                                   MEM_VOLATILE_P (op0));
1630       else
1631         bestmode = GET_MODE (op0);
1632
1633       if (bestmode != VOIDmode
1634           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1635                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1636         {
1637           unsigned HOST_WIDE_INT xoffset, xbitpos;
1638
1639           /* Compute the offset as a multiple of this unit,
1640              counting in bytes.  */
1641           unit = GET_MODE_BITSIZE (bestmode);
1642           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1643           xbitpos = bitnum % unit;
1644
1645           /* Make sure the register is big enough for the whole field.  */
1646           if (xoffset * BITS_PER_UNIT + unit
1647               >= offset * BITS_PER_UNIT + bitsize)
1648             {
1649               rtx last, result, xop0;
1650
1651               last = get_last_insn ();
1652
1653               /* Fetch it to a register in that size.  */
1654               xop0 = adjust_address (op0, bestmode, xoffset);
1655               xop0 = force_reg (bestmode, xop0);
1656               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1657                                             unsignedp, packedp, target,
1658                                             mode, tmode, false);
1659               if (result)
1660                 return result;
1661
1662               delete_insns_since (last);
1663             }
1664         }
1665     }
1666
1667   if (!fallback_p)
1668     return NULL;
1669
1670   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1671                                     bitpos, target, unsignedp, packedp);
1672   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1673 }
1674
1675 /* Generate code to extract a byte-field from STR_RTX
1676    containing BITSIZE bits, starting at BITNUM,
1677    and put it in TARGET if possible (if TARGET is nonzero).
1678    Regardless of TARGET, we return the rtx for where the value is placed.
1679
1680    STR_RTX is the structure containing the byte (a REG or MEM).
1681    UNSIGNEDP is nonzero if this is an unsigned bit field.
1682    PACKEDP is nonzero if the field has the packed attribute.
1683    MODE is the natural mode of the field value once extracted.
1684    TMODE is the mode the caller would like the value to have;
1685    but the value may be returned with type MODE instead.
1686
1687    If a TARGET is specified and we can store in it at no extra cost,
1688    we do so, and return TARGET.
1689    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1690    if they are equally easy.  */
1691
1692 rtx
1693 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1694                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1695                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1696 {
1697   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1698                               target, mode, tmode, true);
1699 }
1700 \f
1701 /* Extract a bit field using shifts and boolean operations
1702    Returns an rtx to represent the value.
1703    OP0 addresses a register (word) or memory (byte).
1704    BITPOS says which bit within the word or byte the bit field starts in.
1705    OFFSET says how many bytes farther the bit field starts;
1706     it is 0 if OP0 is a register.
1707    BITSIZE says how many bits long the bit field is.
1708     (If OP0 is a register, it may be narrower than a full word,
1709      but BITPOS still counts within a full word,
1710      which is significant on bigendian machines.)
1711
1712    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1713    PACKEDP is true if the field has the packed attribute.
1714
1715    If TARGET is nonzero, attempts to store the value there
1716    and return TARGET, but this is not guaranteed.
1717    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1718
1719 static rtx
1720 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1721                          unsigned HOST_WIDE_INT offset,
1722                          unsigned HOST_WIDE_INT bitsize,
1723                          unsigned HOST_WIDE_INT bitpos, rtx target,
1724                          int unsignedp, bool packedp)
1725 {
1726   unsigned int total_bits = BITS_PER_WORD;
1727   enum machine_mode mode;
1728
1729   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1730     {
1731       /* Special treatment for a bit field split across two registers.  */
1732       if (bitsize + bitpos > BITS_PER_WORD)
1733         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1734     }
1735   else
1736     {
1737       /* Get the proper mode to use for this field.  We want a mode that
1738          includes the entire field.  If such a mode would be larger than
1739          a word, we won't be doing the extraction the normal way.  */
1740
1741       if (MEM_VOLATILE_P (op0)
1742           && flag_strict_volatile_bitfields > 0)
1743         {
1744           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1745             mode = GET_MODE (op0);
1746           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1747             mode = GET_MODE (target);
1748           else
1749             mode = tmode;
1750         }
1751       else
1752         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1753                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1754
1755       if (mode == VOIDmode)
1756         /* The only way this should occur is if the field spans word
1757            boundaries.  */
1758         return extract_split_bit_field (op0, bitsize,
1759                                         bitpos + offset * BITS_PER_UNIT,
1760                                         unsignedp);
1761
1762       total_bits = GET_MODE_BITSIZE (mode);
1763
1764       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1765          be in the range 0 to total_bits-1, and put any excess bytes in
1766          OFFSET.  */
1767       if (bitpos >= total_bits)
1768         {
1769           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1770           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1771                      * BITS_PER_UNIT);
1772         }
1773
1774       /* If we're accessing a volatile MEM, we can't do the next
1775          alignment step if it results in a multi-word access where we
1776          otherwise wouldn't have one.  So, check for that case
1777          here.  */
1778       if (MEM_P (op0)
1779           && MEM_VOLATILE_P (op0)
1780           && flag_strict_volatile_bitfields > 0
1781           && bitpos + bitsize <= total_bits
1782           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1783         {
1784           if (STRICT_ALIGNMENT)
1785             {
1786               static bool informed_about_misalignment = false;
1787               bool warned;
1788
1789               if (packedp)
1790                 {
1791                   if (bitsize == total_bits)
1792                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1793                                          "multiple accesses to volatile structure member"
1794                                          " because of packed attribute");
1795                   else
1796                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1797                                          "multiple accesses to volatile structure bitfield"
1798                                          " because of packed attribute");
1799
1800                   return extract_split_bit_field (op0, bitsize,
1801                                                   bitpos + offset * BITS_PER_UNIT,
1802                                                   unsignedp);
1803                 }
1804
1805               if (bitsize == total_bits)
1806                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1807                                      "mis-aligned access used for structure member");
1808               else
1809                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810                                      "mis-aligned access used for structure bitfield");
1811
1812               if (! informed_about_misalignment && warned)
1813                 {
1814                   informed_about_misalignment = true;
1815                   inform (input_location,
1816                           "when a volatile object spans multiple type-sized locations,"
1817                           " the compiler must choose between using a single mis-aligned access to"
1818                           " preserve the volatility, or using multiple aligned accesses to avoid"
1819                           " runtime faults; this code may fail at runtime if the hardware does"
1820                           " not allow this access");
1821                 }
1822             }
1823         }
1824       else
1825         {
1826
1827           /* Get ref to an aligned byte, halfword, or word containing the field.
1828              Adjust BITPOS to be position within a word,
1829              and OFFSET to be the offset of that word.
1830              Then alter OP0 to refer to that word.  */
1831           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1832           offset -= (offset % (total_bits / BITS_PER_UNIT));
1833         }
1834
1835       op0 = adjust_address (op0, mode, offset);
1836     }
1837
1838   mode = GET_MODE (op0);
1839
1840   if (BYTES_BIG_ENDIAN)
1841     /* BITPOS is the distance between our msb and that of OP0.
1842        Convert it to the distance from the lsb.  */
1843     bitpos = total_bits - bitsize - bitpos;
1844
1845   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1846      We have reduced the big-endian case to the little-endian case.  */
1847
1848   if (unsignedp)
1849     {
1850       if (bitpos)
1851         {
1852           /* If the field does not already start at the lsb,
1853              shift it so it does.  */
1854           tree amount = build_int_cst (NULL_TREE, bitpos);
1855           /* Maybe propagate the target for the shift.  */
1856           /* But not if we will return it--could confuse integrate.c.  */
1857           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1858           if (tmode != mode) subtarget = 0;
1859           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1860         }
1861       /* Convert the value to the desired mode.  */
1862       if (mode != tmode)
1863         op0 = convert_to_mode (tmode, op0, 1);
1864
1865       /* Unless the msb of the field used to be the msb when we shifted,
1866          mask out the upper bits.  */
1867
1868       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1869         return expand_binop (GET_MODE (op0), and_optab, op0,
1870                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1871                              target, 1, OPTAB_LIB_WIDEN);
1872       return op0;
1873     }
1874
1875   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1876      then arithmetic-shift its lsb to the lsb of the word.  */
1877   op0 = force_reg (mode, op0);
1878
1879   /* Find the narrowest integer mode that contains the field.  */
1880
1881   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1882        mode = GET_MODE_WIDER_MODE (mode))
1883     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1884       {
1885         op0 = convert_to_mode (mode, op0, 0);
1886         break;
1887       }
1888
1889   if (mode != tmode)
1890     target = 0;
1891
1892   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1893     {
1894       tree amount
1895         = build_int_cst (NULL_TREE,
1896                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1897       /* Maybe propagate the target for the shift.  */
1898       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1899       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1900     }
1901
1902   return expand_shift (RSHIFT_EXPR, mode, op0,
1903                        build_int_cst (NULL_TREE,
1904                                       GET_MODE_BITSIZE (mode) - bitsize),
1905                        target, 0);
1906 }
1907 \f
1908 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1909    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1910    complement of that if COMPLEMENT.  The mask is truncated if
1911    necessary to the width of mode MODE.  The mask is zero-extended if
1912    BITSIZE+BITPOS is too small for MODE.  */
1913
1914 static rtx
1915 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1916 {
1917   double_int mask;
1918
1919   mask = double_int_mask (bitsize);
1920   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1921
1922   if (complement)
1923     mask = double_int_not (mask);
1924
1925   return immed_double_int_const (mask, mode);
1926 }
1927
1928 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1929    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1930
1931 static rtx
1932 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1933 {
1934   double_int val;
1935
1936   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1937   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1938
1939   return immed_double_int_const (val, mode);
1940 }
1941 \f
1942 /* Extract a bit field that is split across two words
1943    and return an RTX for the result.
1944
1945    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1946    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1947    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1948
1949 static rtx
1950 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1951                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1952 {
1953   unsigned int unit;
1954   unsigned int bitsdone = 0;
1955   rtx result = NULL_RTX;
1956   int first = 1;
1957
1958   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1959      much at a time.  */
1960   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1961     unit = BITS_PER_WORD;
1962   else
1963     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1964
1965   while (bitsdone < bitsize)
1966     {
1967       unsigned HOST_WIDE_INT thissize;
1968       rtx part, word;
1969       unsigned HOST_WIDE_INT thispos;
1970       unsigned HOST_WIDE_INT offset;
1971
1972       offset = (bitpos + bitsdone) / unit;
1973       thispos = (bitpos + bitsdone) % unit;
1974
1975       /* THISSIZE must not overrun a word boundary.  Otherwise,
1976          extract_fixed_bit_field will call us again, and we will mutually
1977          recurse forever.  */
1978       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1979       thissize = MIN (thissize, unit - thispos);
1980
1981       /* If OP0 is a register, then handle OFFSET here.
1982
1983          When handling multiword bitfields, extract_bit_field may pass
1984          down a word_mode SUBREG of a larger REG for a bitfield that actually
1985          crosses a word boundary.  Thus, for a SUBREG, we must find
1986          the current word starting from the base register.  */
1987       if (GET_CODE (op0) == SUBREG)
1988         {
1989           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1990           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1991                                         GET_MODE (SUBREG_REG (op0)));
1992           offset = 0;
1993         }
1994       else if (REG_P (op0))
1995         {
1996           word = operand_subword_force (op0, offset, GET_MODE (op0));
1997           offset = 0;
1998         }
1999       else
2000         word = op0;
2001
2002       /* Extract the parts in bit-counting order,
2003          whose meaning is determined by BYTES_PER_UNIT.
2004          OFFSET is in UNITs, and UNIT is in bits.
2005          extract_fixed_bit_field wants offset in bytes.  */
2006       part = extract_fixed_bit_field (word_mode, word,
2007                                       offset * unit / BITS_PER_UNIT,
2008                                       thissize, thispos, 0, 1, false);
2009       bitsdone += thissize;
2010
2011       /* Shift this part into place for the result.  */
2012       if (BYTES_BIG_ENDIAN)
2013         {
2014           if (bitsize != bitsdone)
2015             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2016                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2017                                  0, 1);
2018         }
2019       else
2020         {
2021           if (bitsdone != thissize)
2022             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2023                                  build_int_cst (NULL_TREE,
2024                                                 bitsdone - thissize), 0, 1);
2025         }
2026
2027       if (first)
2028         result = part;
2029       else
2030         /* Combine the parts with bitwise or.  This works
2031            because we extracted each part as an unsigned bit field.  */
2032         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2033                                OPTAB_LIB_WIDEN);
2034
2035       first = 0;
2036     }
2037
2038   /* Unsigned bit field: we are done.  */
2039   if (unsignedp)
2040     return result;
2041   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2042   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2043                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2044                          NULL_RTX, 0);
2045   return expand_shift (RSHIFT_EXPR, word_mode, result,
2046                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2047                        NULL_RTX, 0);
2048 }
2049 \f
2050 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2051    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2052    MODE, fill the upper bits with zeros.  Fail if the layout of either
2053    mode is unknown (as for CC modes) or if the extraction would involve
2054    unprofitable mode punning.  Return the value on success, otherwise
2055    return null.
2056
2057    This is different from gen_lowpart* in these respects:
2058
2059      - the returned value must always be considered an rvalue
2060
2061      - when MODE is wider than SRC_MODE, the extraction involves
2062        a zero extension
2063
2064      - when MODE is smaller than SRC_MODE, the extraction involves
2065        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2066
2067    In other words, this routine performs a computation, whereas the
2068    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2069    operations.  */
2070
2071 rtx
2072 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2073 {
2074   enum machine_mode int_mode, src_int_mode;
2075
2076   if (mode == src_mode)
2077     return src;
2078
2079   if (CONSTANT_P (src))
2080     {
2081       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2082          fails, it will happily create (subreg (symbol_ref)) or similar
2083          invalid SUBREGs.  */
2084       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2085       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2086       if (ret)
2087         return ret;
2088
2089       if (GET_MODE (src) == VOIDmode
2090           || !validate_subreg (mode, src_mode, src, byte))
2091         return NULL_RTX;
2092
2093       src = force_reg (GET_MODE (src), src);
2094       return gen_rtx_SUBREG (mode, src, byte);
2095     }
2096
2097   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2098     return NULL_RTX;
2099
2100   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2101       && MODES_TIEABLE_P (mode, src_mode))
2102     {
2103       rtx x = gen_lowpart_common (mode, src);
2104       if (x)
2105         return x;
2106     }
2107
2108   src_int_mode = int_mode_for_mode (src_mode);
2109   int_mode = int_mode_for_mode (mode);
2110   if (src_int_mode == BLKmode || int_mode == BLKmode)
2111     return NULL_RTX;
2112
2113   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2114     return NULL_RTX;
2115   if (!MODES_TIEABLE_P (int_mode, mode))
2116     return NULL_RTX;
2117
2118   src = gen_lowpart (src_int_mode, src);
2119   src = convert_modes (int_mode, src_int_mode, src, true);
2120   src = gen_lowpart (mode, src);
2121   return src;
2122 }
2123 \f
2124 /* Add INC into TARGET.  */
2125
2126 void
2127 expand_inc (rtx target, rtx inc)
2128 {
2129   rtx value = expand_binop (GET_MODE (target), add_optab,
2130                             target, inc,
2131                             target, 0, OPTAB_LIB_WIDEN);
2132   if (value != target)
2133     emit_move_insn (target, value);
2134 }
2135
2136 /* Subtract DEC from TARGET.  */
2137
2138 void
2139 expand_dec (rtx target, rtx dec)
2140 {
2141   rtx value = expand_binop (GET_MODE (target), sub_optab,
2142                             target, dec,
2143                             target, 0, OPTAB_LIB_WIDEN);
2144   if (value != target)
2145     emit_move_insn (target, value);
2146 }
2147 \f
2148 /* Output a shift instruction for expression code CODE,
2149    with SHIFTED being the rtx for the value to shift,
2150    and AMOUNT the tree for the amount to shift by.
2151    Store the result in the rtx TARGET, if that is convenient.
2152    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2153    Return the rtx for where the value is.  */
2154
2155 rtx
2156 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2157               tree amount, rtx target, int unsignedp)
2158 {
2159   rtx op1, temp = 0;
2160   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2161   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2162   optab lshift_optab = ashl_optab;
2163   optab rshift_arith_optab = ashr_optab;
2164   optab rshift_uns_optab = lshr_optab;
2165   optab lrotate_optab = rotl_optab;
2166   optab rrotate_optab = rotr_optab;
2167   enum machine_mode op1_mode;
2168   int attempt;
2169   bool speed = optimize_insn_for_speed_p ();
2170
2171   op1 = expand_normal (amount);
2172   op1_mode = GET_MODE (op1);
2173
2174   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2175      shift amount is a vector, use the vector/vector shift patterns.  */
2176   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2177     {
2178       lshift_optab = vashl_optab;
2179       rshift_arith_optab = vashr_optab;
2180       rshift_uns_optab = vlshr_optab;
2181       lrotate_optab = vrotl_optab;
2182       rrotate_optab = vrotr_optab;
2183     }
2184
2185   /* Previously detected shift-counts computed by NEGATE_EXPR
2186      and shifted in the other direction; but that does not work
2187      on all machines.  */
2188
2189   if (SHIFT_COUNT_TRUNCATED)
2190     {
2191       if (CONST_INT_P (op1)
2192           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2193               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2194         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2195                        % GET_MODE_BITSIZE (mode));
2196       else if (GET_CODE (op1) == SUBREG
2197                && subreg_lowpart_p (op1)
2198                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2199         op1 = SUBREG_REG (op1);
2200     }
2201
2202   if (op1 == const0_rtx)
2203     return shifted;
2204
2205   /* Check whether its cheaper to implement a left shift by a constant
2206      bit count by a sequence of additions.  */
2207   if (code == LSHIFT_EXPR
2208       && CONST_INT_P (op1)
2209       && INTVAL (op1) > 0
2210       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2211       && INTVAL (op1) < MAX_BITS_PER_WORD
2212       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2213       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2214     {
2215       int i;
2216       for (i = 0; i < INTVAL (op1); i++)
2217         {
2218           temp = force_reg (mode, shifted);
2219           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2220                                   unsignedp, OPTAB_LIB_WIDEN);
2221         }
2222       return shifted;
2223     }
2224
2225   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2226     {
2227       enum optab_methods methods;
2228
2229       if (attempt == 0)
2230         methods = OPTAB_DIRECT;
2231       else if (attempt == 1)
2232         methods = OPTAB_WIDEN;
2233       else
2234         methods = OPTAB_LIB_WIDEN;
2235
2236       if (rotate)
2237         {
2238           /* Widening does not work for rotation.  */
2239           if (methods == OPTAB_WIDEN)
2240             continue;
2241           else if (methods == OPTAB_LIB_WIDEN)
2242             {
2243               /* If we have been unable to open-code this by a rotation,
2244                  do it as the IOR of two shifts.  I.e., to rotate A
2245                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2246                  where C is the bitsize of A.
2247
2248                  It is theoretically possible that the target machine might
2249                  not be able to perform either shift and hence we would
2250                  be making two libcalls rather than just the one for the
2251                  shift (similarly if IOR could not be done).  We will allow
2252                  this extremely unlikely lossage to avoid complicating the
2253                  code below.  */
2254
2255               rtx subtarget = target == shifted ? 0 : target;
2256               tree new_amount, other_amount;
2257               rtx temp1;
2258               tree type = TREE_TYPE (amount);
2259               if (GET_MODE (op1) != TYPE_MODE (type)
2260                   && GET_MODE (op1) != VOIDmode)
2261                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2262               new_amount = make_tree (type, op1);
2263               other_amount
2264                 = fold_build2 (MINUS_EXPR, type,
2265                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2266                                new_amount);
2267
2268               shifted = force_reg (mode, shifted);
2269
2270               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2271                                    mode, shifted, new_amount, 0, 1);
2272               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2273                                     mode, shifted, other_amount, subtarget, 1);
2274               return expand_binop (mode, ior_optab, temp, temp1, target,
2275                                    unsignedp, methods);
2276             }
2277
2278           temp = expand_binop (mode,
2279                                left ? lrotate_optab : rrotate_optab,
2280                                shifted, op1, target, unsignedp, methods);
2281         }
2282       else if (unsignedp)
2283         temp = expand_binop (mode,
2284                              left ? lshift_optab : rshift_uns_optab,
2285                              shifted, op1, target, unsignedp, methods);
2286
2287       /* Do arithmetic shifts.
2288          Also, if we are going to widen the operand, we can just as well
2289          use an arithmetic right-shift instead of a logical one.  */
2290       if (temp == 0 && ! rotate
2291           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2292         {
2293           enum optab_methods methods1 = methods;
2294
2295           /* If trying to widen a log shift to an arithmetic shift,
2296              don't accept an arithmetic shift of the same size.  */
2297           if (unsignedp)
2298             methods1 = OPTAB_MUST_WIDEN;
2299
2300           /* Arithmetic shift */
2301
2302           temp = expand_binop (mode,
2303                                left ? lshift_optab : rshift_arith_optab,
2304                                shifted, op1, target, unsignedp, methods1);
2305         }
2306
2307       /* We used to try extzv here for logical right shifts, but that was
2308          only useful for one machine, the VAX, and caused poor code
2309          generation there for lshrdi3, so the code was deleted and a
2310          define_expand for lshrsi3 was added to vax.md.  */
2311     }
2312
2313   gcc_assert (temp);
2314   return temp;
2315 }
2316 \f
2317 /* Indicates the type of fixup needed after a constant multiplication.
2318    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2319    the result should be negated, and ADD_VARIANT means that the
2320    multiplicand should be added to the result.  */
2321 enum mult_variant {basic_variant, negate_variant, add_variant};
2322
2323 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2324                         const struct mult_cost *, enum machine_mode mode);
2325 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2326                                  struct algorithm *, enum mult_variant *, int);
2327 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2328                               const struct algorithm *, enum mult_variant);
2329 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2330                                                  int, rtx *, int *, int *);
2331 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2332 static rtx extract_high_half (enum machine_mode, rtx);
2333 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2334 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2335                                        int, int);
2336 /* Compute and return the best algorithm for multiplying by T.
2337    The algorithm must cost less than cost_limit
2338    If retval.cost >= COST_LIMIT, no algorithm was found and all
2339    other field of the returned struct are undefined.
2340    MODE is the machine mode of the multiplication.  */
2341
2342 static void
2343 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2344             const struct mult_cost *cost_limit, enum machine_mode mode)
2345 {
2346   int m;
2347   struct algorithm *alg_in, *best_alg;
2348   struct mult_cost best_cost;
2349   struct mult_cost new_limit;
2350   int op_cost, op_latency;
2351   unsigned HOST_WIDE_INT orig_t = t;
2352   unsigned HOST_WIDE_INT q;
2353   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2354   int hash_index;
2355   bool cache_hit = false;
2356   enum alg_code cache_alg = alg_zero;
2357   bool speed = optimize_insn_for_speed_p ();
2358
2359   /* Indicate that no algorithm is yet found.  If no algorithm
2360      is found, this value will be returned and indicate failure.  */
2361   alg_out->cost.cost = cost_limit->cost + 1;
2362   alg_out->cost.latency = cost_limit->latency + 1;
2363
2364   if (cost_limit->cost < 0
2365       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2366     return;
2367
2368   /* Restrict the bits of "t" to the multiplication's mode.  */
2369   t &= GET_MODE_MASK (mode);
2370
2371   /* t == 1 can be done in zero cost.  */
2372   if (t == 1)
2373     {
2374       alg_out->ops = 1;
2375       alg_out->cost.cost = 0;
2376       alg_out->cost.latency = 0;
2377       alg_out->op[0] = alg_m;
2378       return;
2379     }
2380
2381   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2382      fail now.  */
2383   if (t == 0)
2384     {
2385       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2386         return;
2387       else
2388         {
2389           alg_out->ops = 1;
2390           alg_out->cost.cost = zero_cost[speed];
2391           alg_out->cost.latency = zero_cost[speed];
2392           alg_out->op[0] = alg_zero;
2393           return;
2394         }
2395     }
2396
2397   /* We'll be needing a couple extra algorithm structures now.  */
2398
2399   alg_in = XALLOCA (struct algorithm);
2400   best_alg = XALLOCA (struct algorithm);
2401   best_cost = *cost_limit;
2402
2403   /* Compute the hash index.  */
2404   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2405
2406   /* See if we already know what to do for T.  */
2407   if (alg_hash[hash_index].t == t
2408       && alg_hash[hash_index].mode == mode
2409       && alg_hash[hash_index].mode == mode
2410       && alg_hash[hash_index].speed == speed
2411       && alg_hash[hash_index].alg != alg_unknown)
2412     {
2413       cache_alg = alg_hash[hash_index].alg;
2414
2415       if (cache_alg == alg_impossible)
2416         {
2417           /* The cache tells us that it's impossible to synthesize
2418              multiplication by T within alg_hash[hash_index].cost.  */
2419           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2420             /* COST_LIMIT is at least as restrictive as the one
2421                recorded in the hash table, in which case we have no
2422                hope of synthesizing a multiplication.  Just
2423                return.  */
2424             return;
2425
2426           /* If we get here, COST_LIMIT is less restrictive than the
2427              one recorded in the hash table, so we may be able to
2428              synthesize a multiplication.  Proceed as if we didn't
2429              have the cache entry.  */
2430         }
2431       else
2432         {
2433           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2434             /* The cached algorithm shows that this multiplication
2435                requires more cost than COST_LIMIT.  Just return.  This
2436                way, we don't clobber this cache entry with
2437                alg_impossible but retain useful information.  */
2438             return;
2439
2440           cache_hit = true;
2441
2442           switch (cache_alg)
2443             {
2444             case alg_shift:
2445               goto do_alg_shift;
2446
2447             case alg_add_t_m2:
2448             case alg_sub_t_m2:
2449               goto do_alg_addsub_t_m2;
2450
2451             case alg_add_factor:
2452             case alg_sub_factor:
2453               goto do_alg_addsub_factor;
2454
2455             case alg_add_t2_m:
2456               goto do_alg_add_t2_m;
2457
2458             case alg_sub_t2_m:
2459               goto do_alg_sub_t2_m;
2460
2461             default:
2462               gcc_unreachable ();
2463             }
2464         }
2465     }
2466
2467   /* If we have a group of zero bits at the low-order part of T, try
2468      multiplying by the remaining bits and then doing a shift.  */
2469
2470   if ((t & 1) == 0)
2471     {
2472     do_alg_shift:
2473       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2474       if (m < maxm)
2475         {
2476           q = t >> m;
2477           /* The function expand_shift will choose between a shift and
2478              a sequence of additions, so the observed cost is given as
2479              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2480           op_cost = m * add_cost[speed][mode];
2481           if (shift_cost[speed][mode][m] < op_cost)
2482             op_cost = shift_cost[speed][mode][m];
2483           new_limit.cost = best_cost.cost - op_cost;
2484           new_limit.latency = best_cost.latency - op_cost;
2485           synth_mult (alg_in, q, &new_limit, mode);
2486
2487           alg_in->cost.cost += op_cost;
2488           alg_in->cost.latency += op_cost;
2489           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2490             {
2491               struct algorithm *x;
2492               best_cost = alg_in->cost;
2493               x = alg_in, alg_in = best_alg, best_alg = x;
2494               best_alg->log[best_alg->ops] = m;
2495               best_alg->op[best_alg->ops] = alg_shift;
2496             }
2497
2498           /* See if treating ORIG_T as a signed number yields a better
2499              sequence.  Try this sequence only for a negative ORIG_T
2500              as it would be useless for a non-negative ORIG_T.  */
2501           if ((HOST_WIDE_INT) orig_t < 0)
2502             {
2503               /* Shift ORIG_T as follows because a right shift of a
2504                  negative-valued signed type is implementation
2505                  defined.  */
2506               q = ~(~orig_t >> m);
2507               /* The function expand_shift will choose between a shift
2508                  and a sequence of additions, so the observed cost is
2509                  given as MIN (m * add_cost[speed][mode],
2510                  shift_cost[speed][mode][m]).  */
2511               op_cost = m * add_cost[speed][mode];
2512               if (shift_cost[speed][mode][m] < op_cost)
2513                 op_cost = shift_cost[speed][mode][m];
2514               new_limit.cost = best_cost.cost - op_cost;
2515               new_limit.latency = best_cost.latency - op_cost;
2516               synth_mult (alg_in, q, &new_limit, mode);
2517
2518               alg_in->cost.cost += op_cost;
2519               alg_in->cost.latency += op_cost;
2520               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2521                 {
2522                   struct algorithm *x;
2523                   best_cost = alg_in->cost;
2524                   x = alg_in, alg_in = best_alg, best_alg = x;
2525                   best_alg->log[best_alg->ops] = m;
2526                   best_alg->op[best_alg->ops] = alg_shift;
2527                 }
2528             }
2529         }
2530       if (cache_hit)
2531         goto done;
2532     }
2533
2534   /* If we have an odd number, add or subtract one.  */
2535   if ((t & 1) != 0)
2536     {
2537       unsigned HOST_WIDE_INT w;
2538
2539     do_alg_addsub_t_m2:
2540       for (w = 1; (w & t) != 0; w <<= 1)
2541         ;
2542       /* If T was -1, then W will be zero after the loop.  This is another
2543          case where T ends with ...111.  Handling this with (T + 1) and
2544          subtract 1 produces slightly better code and results in algorithm
2545          selection much faster than treating it like the ...0111 case
2546          below.  */
2547       if (w == 0
2548           || (w > 2
2549               /* Reject the case where t is 3.
2550                  Thus we prefer addition in that case.  */
2551               && t != 3))
2552         {
2553           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2554
2555           op_cost = add_cost[speed][mode];
2556           new_limit.cost = best_cost.cost - op_cost;
2557           new_limit.latency = best_cost.latency - op_cost;
2558           synth_mult (alg_in, t + 1, &new_limit, mode);
2559
2560           alg_in->cost.cost += op_cost;
2561           alg_in->cost.latency += op_cost;
2562           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2563             {
2564               struct algorithm *x;
2565               best_cost = alg_in->cost;
2566               x = alg_in, alg_in = best_alg, best_alg = x;
2567               best_alg->log[best_alg->ops] = 0;
2568               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2569             }
2570         }
2571       else
2572         {
2573           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2574
2575           op_cost = add_cost[speed][mode];
2576           new_limit.cost = best_cost.cost - op_cost;
2577           new_limit.latency = best_cost.latency - op_cost;
2578           synth_mult (alg_in, t - 1, &new_limit, mode);
2579
2580           alg_in->cost.cost += op_cost;
2581           alg_in->cost.latency += op_cost;
2582           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2583             {
2584               struct algorithm *x;
2585               best_cost = alg_in->cost;
2586               x = alg_in, alg_in = best_alg, best_alg = x;
2587               best_alg->log[best_alg->ops] = 0;
2588               best_alg->op[best_alg->ops] = alg_add_t_m2;
2589             }
2590         }
2591
2592       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2593          quickly with a - a * n for some appropriate constant n.  */
2594       m = exact_log2 (-orig_t + 1);
2595       if (m >= 0 && m < maxm)
2596         {
2597           op_cost = shiftsub1_cost[speed][mode][m];
2598           new_limit.cost = best_cost.cost - op_cost;
2599           new_limit.latency = best_cost.latency - op_cost;
2600           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2601
2602           alg_in->cost.cost += op_cost;
2603           alg_in->cost.latency += op_cost;
2604           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2605             {
2606               struct algorithm *x;
2607               best_cost = alg_in->cost;
2608               x = alg_in, alg_in = best_alg, best_alg = x;
2609               best_alg->log[best_alg->ops] = m;
2610               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2611             }
2612         }
2613
2614       if (cache_hit)
2615         goto done;
2616     }
2617
2618   /* Look for factors of t of the form
2619      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2620      If we find such a factor, we can multiply by t using an algorithm that
2621      multiplies by q, shift the result by m and add/subtract it to itself.
2622
2623      We search for large factors first and loop down, even if large factors
2624      are less probable than small; if we find a large factor we will find a
2625      good sequence quickly, and therefore be able to prune (by decreasing
2626      COST_LIMIT) the search.  */
2627
2628  do_alg_addsub_factor:
2629   for (m = floor_log2 (t - 1); m >= 2; m--)
2630     {
2631       unsigned HOST_WIDE_INT d;
2632
2633       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2634       if (t % d == 0 && t > d && m < maxm
2635           && (!cache_hit || cache_alg == alg_add_factor))
2636         {
2637           /* If the target has a cheap shift-and-add instruction use
2638              that in preference to a shift insn followed by an add insn.
2639              Assume that the shift-and-add is "atomic" with a latency
2640              equal to its cost, otherwise assume that on superscalar
2641              hardware the shift may be executed concurrently with the
2642              earlier steps in the algorithm.  */
2643           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2644           if (shiftadd_cost[speed][mode][m] < op_cost)
2645             {
2646               op_cost = shiftadd_cost[speed][mode][m];
2647               op_latency = op_cost;
2648             }
2649           else
2650             op_latency = add_cost[speed][mode];
2651
2652           new_limit.cost = best_cost.cost - op_cost;
2653           new_limit.latency = best_cost.latency - op_latency;
2654           synth_mult (alg_in, t / d, &new_limit, mode);
2655
2656           alg_in->cost.cost += op_cost;
2657           alg_in->cost.latency += op_latency;
2658           if (alg_in->cost.latency < op_cost)
2659             alg_in->cost.latency = op_cost;
2660           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2661             {
2662               struct algorithm *x;
2663               best_cost = alg_in->cost;
2664               x = alg_in, alg_in = best_alg, best_alg = x;
2665               best_alg->log[best_alg->ops] = m;
2666               best_alg->op[best_alg->ops] = alg_add_factor;
2667             }
2668           /* Other factors will have been taken care of in the recursion.  */
2669           break;
2670         }
2671
2672       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2673       if (t % d == 0 && t > d && m < maxm
2674           && (!cache_hit || cache_alg == alg_sub_factor))
2675         {
2676           /* If the target has a cheap shift-and-subtract insn use
2677              that in preference to a shift insn followed by a sub insn.
2678              Assume that the shift-and-sub is "atomic" with a latency
2679              equal to it's cost, otherwise assume that on superscalar
2680              hardware the shift may be executed concurrently with the
2681              earlier steps in the algorithm.  */
2682           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2683           if (shiftsub0_cost[speed][mode][m] < op_cost)
2684             {
2685               op_cost = shiftsub0_cost[speed][mode][m];
2686               op_latency = op_cost;
2687             }
2688           else
2689             op_latency = add_cost[speed][mode];
2690
2691           new_limit.cost = best_cost.cost - op_cost;
2692           new_limit.latency = best_cost.latency - op_latency;
2693           synth_mult (alg_in, t / d, &new_limit, mode);
2694
2695           alg_in->cost.cost += op_cost;
2696           alg_in->cost.latency += op_latency;
2697           if (alg_in->cost.latency < op_cost)
2698             alg_in->cost.latency = op_cost;
2699           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2700             {
2701               struct algorithm *x;
2702               best_cost = alg_in->cost;
2703               x = alg_in, alg_in = best_alg, best_alg = x;
2704               best_alg->log[best_alg->ops] = m;
2705               best_alg->op[best_alg->ops] = alg_sub_factor;
2706             }
2707           break;
2708         }
2709     }
2710   if (cache_hit)
2711     goto done;
2712
2713   /* Try shift-and-add (load effective address) instructions,
2714      i.e. do a*3, a*5, a*9.  */
2715   if ((t & 1) != 0)
2716     {
2717     do_alg_add_t2_m:
2718       q = t - 1;
2719       q = q & -q;
2720       m = exact_log2 (q);
2721       if (m >= 0 && m < maxm)
2722         {
2723           op_cost = shiftadd_cost[speed][mode][m];
2724           new_limit.cost = best_cost.cost - op_cost;
2725           new_limit.latency = best_cost.latency - op_cost;
2726           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2727
2728           alg_in->cost.cost += op_cost;
2729           alg_in->cost.latency += op_cost;
2730           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731             {
2732               struct algorithm *x;
2733               best_cost = alg_in->cost;
2734               x = alg_in, alg_in = best_alg, best_alg = x;
2735               best_alg->log[best_alg->ops] = m;
2736               best_alg->op[best_alg->ops] = alg_add_t2_m;
2737             }
2738         }
2739       if (cache_hit)
2740         goto done;
2741
2742     do_alg_sub_t2_m:
2743       q = t + 1;
2744       q = q & -q;
2745       m = exact_log2 (q);
2746       if (m >= 0 && m < maxm)
2747         {
2748           op_cost = shiftsub0_cost[speed][mode][m];
2749           new_limit.cost = best_cost.cost - op_cost;
2750           new_limit.latency = best_cost.latency - op_cost;
2751           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2752
2753           alg_in->cost.cost += op_cost;
2754           alg_in->cost.latency += op_cost;
2755           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2756             {
2757               struct algorithm *x;
2758               best_cost = alg_in->cost;
2759               x = alg_in, alg_in = best_alg, best_alg = x;
2760               best_alg->log[best_alg->ops] = m;
2761               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2762             }
2763         }
2764       if (cache_hit)
2765         goto done;
2766     }
2767
2768  done:
2769   /* If best_cost has not decreased, we have not found any algorithm.  */
2770   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2771     {
2772       /* We failed to find an algorithm.  Record alg_impossible for
2773          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2774          we are asked to find an algorithm for T within the same or
2775          lower COST_LIMIT, we can immediately return to the
2776          caller.  */
2777       alg_hash[hash_index].t = t;
2778       alg_hash[hash_index].mode = mode;
2779       alg_hash[hash_index].speed = speed;
2780       alg_hash[hash_index].alg = alg_impossible;
2781       alg_hash[hash_index].cost = *cost_limit;
2782       return;
2783     }
2784
2785   /* Cache the result.  */
2786   if (!cache_hit)
2787     {
2788       alg_hash[hash_index].t = t;
2789       alg_hash[hash_index].mode = mode;
2790       alg_hash[hash_index].speed = speed;
2791       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2792       alg_hash[hash_index].cost.cost = best_cost.cost;
2793       alg_hash[hash_index].cost.latency = best_cost.latency;
2794     }
2795
2796   /* If we are getting a too long sequence for `struct algorithm'
2797      to record, make this search fail.  */
2798   if (best_alg->ops == MAX_BITS_PER_WORD)
2799     return;
2800
2801   /* Copy the algorithm from temporary space to the space at alg_out.
2802      We avoid using structure assignment because the majority of
2803      best_alg is normally undefined, and this is a critical function.  */
2804   alg_out->ops = best_alg->ops + 1;
2805   alg_out->cost = best_cost;
2806   memcpy (alg_out->op, best_alg->op,
2807           alg_out->ops * sizeof *alg_out->op);
2808   memcpy (alg_out->log, best_alg->log,
2809           alg_out->ops * sizeof *alg_out->log);
2810 }
2811 \f
2812 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2813    Try three variations:
2814
2815        - a shift/add sequence based on VAL itself
2816        - a shift/add sequence based on -VAL, followed by a negation
2817        - a shift/add sequence based on VAL - 1, followed by an addition.
2818
2819    Return true if the cheapest of these cost less than MULT_COST,
2820    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2821
2822 static bool
2823 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2824                      struct algorithm *alg, enum mult_variant *variant,
2825                      int mult_cost)
2826 {
2827   struct algorithm alg2;
2828   struct mult_cost limit;
2829   int op_cost;
2830   bool speed = optimize_insn_for_speed_p ();
2831
2832   /* Fail quickly for impossible bounds.  */
2833   if (mult_cost < 0)
2834     return false;
2835
2836   /* Ensure that mult_cost provides a reasonable upper bound.
2837      Any constant multiplication can be performed with less
2838      than 2 * bits additions.  */
2839   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2840   if (mult_cost > op_cost)
2841     mult_cost = op_cost;
2842
2843   *variant = basic_variant;
2844   limit.cost = mult_cost;
2845   limit.latency = mult_cost;
2846   synth_mult (alg, val, &limit, mode);
2847
2848   /* This works only if the inverted value actually fits in an
2849      `unsigned int' */
2850   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2851     {
2852       op_cost = neg_cost[speed][mode];
2853       if (MULT_COST_LESS (&alg->cost, mult_cost))
2854         {
2855           limit.cost = alg->cost.cost - op_cost;
2856           limit.latency = alg->cost.latency - op_cost;
2857         }
2858       else
2859         {
2860           limit.cost = mult_cost - op_cost;
2861           limit.latency = mult_cost - op_cost;
2862         }
2863
2864       synth_mult (&alg2, -val, &limit, mode);
2865       alg2.cost.cost += op_cost;
2866       alg2.cost.latency += op_cost;
2867       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2868         *alg = alg2, *variant = negate_variant;
2869     }
2870
2871   /* This proves very useful for division-by-constant.  */
2872   op_cost = add_cost[speed][mode];
2873   if (MULT_COST_LESS (&alg->cost, mult_cost))
2874     {
2875       limit.cost = alg->cost.cost - op_cost;
2876       limit.latency = alg->cost.latency - op_cost;
2877     }
2878   else
2879     {
2880       limit.cost = mult_cost - op_cost;
2881       limit.latency = mult_cost - op_cost;
2882     }
2883
2884   synth_mult (&alg2, val - 1, &limit, mode);
2885   alg2.cost.cost += op_cost;
2886   alg2.cost.latency += op_cost;
2887   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2888     *alg = alg2, *variant = add_variant;
2889
2890   return MULT_COST_LESS (&alg->cost, mult_cost);
2891 }
2892
2893 /* A subroutine of expand_mult, used for constant multiplications.
2894    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2895    convenient.  Use the shift/add sequence described by ALG and apply
2896    the final fixup specified by VARIANT.  */
2897
2898 static rtx
2899 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2900                    rtx target, const struct algorithm *alg,
2901                    enum mult_variant variant)
2902 {
2903   HOST_WIDE_INT val_so_far;
2904   rtx insn, accum, tem;
2905   int opno;
2906   enum machine_mode nmode;
2907
2908   /* Avoid referencing memory over and over and invalid sharing
2909      on SUBREGs.  */
2910   op0 = force_reg (mode, op0);
2911
2912   /* ACCUM starts out either as OP0 or as a zero, depending on
2913      the first operation.  */
2914
2915   if (alg->op[0] == alg_zero)
2916     {
2917       accum = copy_to_mode_reg (mode, const0_rtx);
2918       val_so_far = 0;
2919     }
2920   else if (alg->op[0] == alg_m)
2921     {
2922       accum = copy_to_mode_reg (mode, op0);
2923       val_so_far = 1;
2924     }
2925   else
2926     gcc_unreachable ();
2927
2928   for (opno = 1; opno < alg->ops; opno++)
2929     {
2930       int log = alg->log[opno];
2931       rtx shift_subtarget = optimize ? 0 : accum;
2932       rtx add_target
2933         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2934            && !optimize)
2935           ? target : 0;
2936       rtx accum_target = optimize ? 0 : accum;
2937
2938       switch (alg->op[opno])
2939         {
2940         case alg_shift:
2941           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2942                               build_int_cst (NULL_TREE, log),
2943                               NULL_RTX, 0);
2944           /* REG_EQUAL note will be attached to the following insn.  */
2945           emit_move_insn (accum, tem);
2946           val_so_far <<= log;
2947           break;
2948
2949         case alg_add_t_m2:
2950           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2951                               build_int_cst (NULL_TREE, log),
2952                               NULL_RTX, 0);
2953           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2954                                  add_target ? add_target : accum_target);
2955           val_so_far += (HOST_WIDE_INT) 1 << log;
2956           break;
2957
2958         case alg_sub_t_m2:
2959           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2960                               build_int_cst (NULL_TREE, log),
2961                               NULL_RTX, 0);
2962           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2963                                  add_target ? add_target : accum_target);
2964           val_so_far -= (HOST_WIDE_INT) 1 << log;
2965           break;
2966
2967         case alg_add_t2_m:
2968           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2969                                 build_int_cst (NULL_TREE, log),
2970                                 shift_subtarget,
2971                                 0);
2972           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2973                                  add_target ? add_target : accum_target);
2974           val_so_far = (val_so_far << log) + 1;
2975           break;
2976
2977         case alg_sub_t2_m:
2978           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2979                                 build_int_cst (NULL_TREE, log),
2980                                 shift_subtarget, 0);
2981           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2982                                  add_target ? add_target : accum_target);
2983           val_so_far = (val_so_far << log) - 1;
2984           break;
2985
2986         case alg_add_factor:
2987           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2988                               build_int_cst (NULL_TREE, log),
2989                               NULL_RTX, 0);
2990           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2991                                  add_target ? add_target : accum_target);
2992           val_so_far += val_so_far << log;
2993           break;
2994
2995         case alg_sub_factor:
2996           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2997                               build_int_cst (NULL_TREE, log),
2998                               NULL_RTX, 0);
2999           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3000                                  (add_target
3001                                   ? add_target : (optimize ? 0 : tem)));
3002           val_so_far = (val_so_far << log) - val_so_far;
3003           break;
3004
3005         default:
3006           gcc_unreachable ();
3007         }
3008
3009       /* Write a REG_EQUAL note on the last insn so that we can cse
3010          multiplication sequences.  Note that if ACCUM is a SUBREG,
3011          we've set the inner register and must properly indicate
3012          that.  */
3013
3014       tem = op0, nmode = mode;
3015       if (GET_CODE (accum) == SUBREG)
3016         {
3017           nmode = GET_MODE (SUBREG_REG (accum));
3018           tem = gen_lowpart (nmode, op0);
3019         }
3020
3021       insn = get_last_insn ();
3022       set_unique_reg_note (insn, REG_EQUAL,
3023                            gen_rtx_MULT (nmode, tem,
3024                                          GEN_INT (val_so_far)));
3025     }
3026
3027   if (variant == negate_variant)
3028     {
3029       val_so_far = -val_so_far;
3030       accum = expand_unop (mode, neg_optab, accum, target, 0);
3031     }
3032   else if (variant == add_variant)
3033     {
3034       val_so_far = val_so_far + 1;
3035       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3036     }
3037
3038   /* Compare only the bits of val and val_so_far that are significant
3039      in the result mode, to avoid sign-/zero-extension confusion.  */
3040   val &= GET_MODE_MASK (mode);
3041   val_so_far &= GET_MODE_MASK (mode);
3042   gcc_assert (val == val_so_far);
3043
3044   return accum;
3045 }
3046
3047 /* Perform a multiplication and return an rtx for the result.
3048    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3049    TARGET is a suggestion for where to store the result (an rtx).
3050
3051    We check specially for a constant integer as OP1.
3052    If you want this check for OP0 as well, then before calling
3053    you should swap the two operands if OP0 would be constant.  */
3054
3055 rtx
3056 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3057              int unsignedp)
3058 {
3059   enum mult_variant variant;
3060   struct algorithm algorithm;
3061   int max_cost;
3062   bool speed = optimize_insn_for_speed_p ();
3063
3064   /* Handling const0_rtx here allows us to use zero as a rogue value for
3065      coeff below.  */
3066   if (op1 == const0_rtx)
3067     return const0_rtx;
3068   if (op1 == const1_rtx)
3069     return op0;
3070   if (op1 == constm1_rtx)
3071     return expand_unop (mode,
3072                         GET_MODE_CLASS (mode) == MODE_INT
3073                         && !unsignedp && flag_trapv
3074                         ? negv_optab : neg_optab,
3075                         op0, target, 0);
3076
3077   /* These are the operations that are potentially turned into a sequence
3078      of shifts and additions.  */
3079   if (SCALAR_INT_MODE_P (mode)
3080       && (unsignedp || !flag_trapv))
3081     {
3082       HOST_WIDE_INT coeff = 0;
3083       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3084
3085       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3086          less than or equal in size to `unsigned int' this doesn't matter.
3087          If the mode is larger than `unsigned int', then synth_mult works
3088          only if the constant value exactly fits in an `unsigned int' without
3089          any truncation.  This means that multiplying by negative values does
3090          not work; results are off by 2^32 on a 32 bit machine.  */
3091
3092       if (CONST_INT_P (op1))
3093         {
3094           /* Attempt to handle multiplication of DImode values by negative
3095              coefficients, by performing the multiplication by a positive
3096              multiplier and then inverting the result.  */
3097           if (INTVAL (op1) < 0
3098               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3099             {
3100               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3101                  result is interpreted as an unsigned coefficient.
3102                  Exclude cost of op0 from max_cost to match the cost
3103                  calculation of the synth_mult.  */
3104               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3105                          - neg_cost[speed][mode];
3106               if (max_cost > 0
3107                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3108                                           &variant, max_cost))
3109                 {
3110                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3111                                                 NULL_RTX, &algorithm,
3112                                                 variant);
3113                   return expand_unop (mode, neg_optab, temp, target, 0);
3114                 }
3115             }
3116           else coeff = INTVAL (op1);
3117         }
3118       else if (GET_CODE (op1) == CONST_DOUBLE)
3119         {
3120           /* If we are multiplying in DImode, it may still be a win
3121              to try to work with shifts and adds.  */
3122           if (CONST_DOUBLE_HIGH (op1) == 0
3123               && CONST_DOUBLE_LOW (op1) > 0)
3124             coeff = CONST_DOUBLE_LOW (op1);
3125           else if (CONST_DOUBLE_LOW (op1) == 0
3126                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3127             {
3128               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3129                           + HOST_BITS_PER_WIDE_INT;
3130               return expand_shift (LSHIFT_EXPR, mode, op0,
3131                                    build_int_cst (NULL_TREE, shift),
3132                                    target, unsignedp);
3133             }
3134         }
3135
3136       /* We used to test optimize here, on the grounds that it's better to
3137          produce a smaller program when -O is not used.  But this causes
3138          such a terrible slowdown sometimes that it seems better to always
3139          use synth_mult.  */
3140       if (coeff != 0)
3141         {
3142           /* Special case powers of two.  */
3143           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3144             return expand_shift (LSHIFT_EXPR, mode, op0,
3145                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3146                                  target, unsignedp);
3147
3148           /* Exclude cost of op0 from max_cost to match the cost
3149              calculation of the synth_mult.  */
3150           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3151           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3152                                    max_cost))
3153             return expand_mult_const (mode, op0, coeff, target,
3154                                       &algorithm, variant);
3155         }
3156     }
3157
3158   if (GET_CODE (op0) == CONST_DOUBLE)
3159     {
3160       rtx temp = op0;
3161       op0 = op1;
3162       op1 = temp;
3163     }
3164
3165   /* Expand x*2.0 as x+x.  */
3166   if (GET_CODE (op1) == CONST_DOUBLE
3167       && SCALAR_FLOAT_MODE_P (mode))
3168     {
3169       REAL_VALUE_TYPE d;
3170       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3171
3172       if (REAL_VALUES_EQUAL (d, dconst2))
3173         {
3174           op0 = force_reg (GET_MODE (op0), op0);
3175           return expand_binop (mode, add_optab, op0, op0,
3176                                target, unsignedp, OPTAB_LIB_WIDEN);
3177         }
3178     }
3179
3180   /* This used to use umul_optab if unsigned, but for non-widening multiply
3181      there is no difference between signed and unsigned.  */
3182   op0 = expand_binop (mode,
3183                       ! unsignedp
3184                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3185                       ? smulv_optab : smul_optab,
3186                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3187   gcc_assert (op0);
3188   return op0;
3189 }
3190
3191 /* Perform a widening multiplication and return an rtx for the result.
3192    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3193    TARGET is a suggestion for where to store the result (an rtx).
3194    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3195    or smul_widen_optab.
3196
3197    We check specially for a constant integer as OP1, comparing the
3198    cost of a widening multiply against the cost of a sequence of shifts
3199    and adds.  */
3200
3201 rtx
3202 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3203                       int unsignedp, optab this_optab)
3204 {
3205   bool speed = optimize_insn_for_speed_p ();
3206   rtx cop1;
3207
3208   if (CONST_INT_P (op1)
3209       && GET_MODE (op0) != VOIDmode
3210       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3211                                 this_optab == umul_widen_optab))
3212       && CONST_INT_P (cop1)
3213       && (INTVAL (cop1) >= 0
3214           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3215     {
3216       HOST_WIDE_INT coeff = INTVAL (cop1);
3217       int max_cost;
3218       enum mult_variant variant;
3219       struct algorithm algorithm;
3220
3221       /* Special case powers of two.  */
3222       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3223         {
3224           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3225           return expand_shift (LSHIFT_EXPR, mode, op0,
3226                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3227                                target, unsignedp);
3228         }
3229
3230       /* Exclude cost of op0 from max_cost to match the cost
3231          calculation of the synth_mult.  */
3232       max_cost = mul_widen_cost[speed][mode];
3233       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3234                                max_cost))
3235         {
3236           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3237           return expand_mult_const (mode, op0, coeff, target,
3238                                     &algorithm, variant);
3239         }
3240     }
3241   return expand_binop (mode, this_optab, op0, op1, target,
3242                        unsignedp, OPTAB_LIB_WIDEN);
3243 }
3244 \f
3245 /* Return the smallest n such that 2**n >= X.  */
3246
3247 int
3248 ceil_log2 (unsigned HOST_WIDE_INT x)
3249 {
3250   return floor_log2 (x - 1) + 1;
3251 }
3252
3253 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3254    replace division by D, and put the least significant N bits of the result
3255    in *MULTIPLIER_PTR and return the most significant bit.
3256
3257    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3258    needed precision is in PRECISION (should be <= N).
3259
3260    PRECISION should be as small as possible so this function can choose
3261    multiplier more freely.
3262
3263    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3264    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3265
3266    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3267    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3268
3269 static
3270 unsigned HOST_WIDE_INT
3271 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3272                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3273 {
3274   HOST_WIDE_INT mhigh_hi, mlow_hi;
3275   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3276   int lgup, post_shift;
3277   int pow, pow2;
3278   unsigned HOST_WIDE_INT nl, dummy1;
3279   HOST_WIDE_INT nh, dummy2;
3280
3281   /* lgup = ceil(log2(divisor)); */
3282   lgup = ceil_log2 (d);
3283
3284   gcc_assert (lgup <= n);
3285
3286   pow = n + lgup;
3287   pow2 = n + lgup - precision;
3288
3289   /* We could handle this with some effort, but this case is much
3290      better handled directly with a scc insn, so rely on caller using
3291      that.  */
3292   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3293
3294   /* mlow = 2^(N + lgup)/d */
3295  if (pow >= HOST_BITS_PER_WIDE_INT)
3296     {
3297       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3298       nl = 0;
3299     }
3300   else
3301     {
3302       nh = 0;
3303       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3304     }
3305   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3306                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3307
3308   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3309   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3310     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3311   else
3312     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3313   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3314                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3315
3316   gcc_assert (!mhigh_hi || nh - d < d);
3317   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3318   /* Assert that mlow < mhigh.  */
3319   gcc_assert (mlow_hi < mhigh_hi
3320               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3321
3322   /* If precision == N, then mlow, mhigh exceed 2^N
3323      (but they do not exceed 2^(N+1)).  */
3324
3325   /* Reduce to lowest terms.  */
3326   for (post_shift = lgup; post_shift > 0; post_shift--)
3327     {
3328       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3329       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3330       if (ml_lo >= mh_lo)
3331         break;
3332
3333       mlow_hi = 0;
3334       mlow_lo = ml_lo;
3335       mhigh_hi = 0;
3336       mhigh_lo = mh_lo;
3337     }
3338
3339   *post_shift_ptr = post_shift;
3340   *lgup_ptr = lgup;
3341   if (n < HOST_BITS_PER_WIDE_INT)
3342     {
3343       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3344       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3345       return mhigh_lo >= mask;
3346     }
3347   else
3348     {
3349       *multiplier_ptr = GEN_INT (mhigh_lo);
3350       return mhigh_hi;
3351     }
3352 }
3353
3354 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3355    congruent to 1 (mod 2**N).  */
3356
3357 static unsigned HOST_WIDE_INT
3358 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3359 {
3360   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3361
3362   /* The algorithm notes that the choice y = x satisfies
3363      x*y == 1 mod 2^3, since x is assumed odd.
3364      Each iteration doubles the number of bits of significance in y.  */
3365
3366   unsigned HOST_WIDE_INT mask;
3367   unsigned HOST_WIDE_INT y = x;
3368   int nbit = 3;
3369
3370   mask = (n == HOST_BITS_PER_WIDE_INT
3371           ? ~(unsigned HOST_WIDE_INT) 0
3372           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3373
3374   while (nbit < n)
3375     {
3376       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3377       nbit *= 2;
3378     }
3379   return y;
3380 }
3381
3382 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3383    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3384    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3385    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3386    become signed.
3387
3388    The result is put in TARGET if that is convenient.
3389
3390    MODE is the mode of operation.  */
3391
3392 rtx
3393 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3394                              rtx op1, rtx target, int unsignedp)
3395 {
3396   rtx tem;
3397   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3398
3399   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3400                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3401                       NULL_RTX, 0);
3402   tem = expand_and (mode, tem, op1, NULL_RTX);
3403   adj_operand
3404     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3405                      adj_operand);
3406
3407   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3408                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3409                       NULL_RTX, 0);
3410   tem = expand_and (mode, tem, op0, NULL_RTX);
3411   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3412                           target);
3413
3414   return target;
3415 }
3416
3417 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3418
3419 static rtx
3420 extract_high_half (enum machine_mode mode, rtx op)
3421 {
3422   enum machine_mode wider_mode;
3423
3424   if (mode == word_mode)
3425     return gen_highpart (mode, op);
3426
3427   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3428
3429   wider_mode = GET_MODE_WIDER_MODE (mode);
3430   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3431                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3432   return convert_modes (mode, wider_mode, op, 0);
3433 }
3434
3435 /* Like expand_mult_highpart, but only consider using a multiplication
3436    optab.  OP1 is an rtx for the constant operand.  */
3437
3438 static rtx
3439 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3440                             rtx target, int unsignedp, int max_cost)
3441 {
3442   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3443   enum machine_mode wider_mode;
3444   optab moptab;
3445   rtx tem;
3446   int size;
3447   bool speed = optimize_insn_for_speed_p ();
3448
3449   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3450
3451   wider_mode = GET_MODE_WIDER_MODE (mode);
3452   size = GET_MODE_BITSIZE (mode);
3453
3454   /* Firstly, try using a multiplication insn that only generates the needed
3455      high part of the product, and in the sign flavor of unsignedp.  */
3456   if (mul_highpart_cost[speed][mode] < max_cost)
3457     {
3458       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3459       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3460                           unsignedp, OPTAB_DIRECT);
3461       if (tem)
3462         return tem;
3463     }
3464
3465   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3466      Need to adjust the result after the multiplication.  */
3467   if (size - 1 < BITS_PER_WORD
3468       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3469           + 4 * add_cost[speed][mode] < max_cost))
3470     {
3471       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3472       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3473                           unsignedp, OPTAB_DIRECT);
3474       if (tem)
3475         /* We used the wrong signedness.  Adjust the result.  */
3476         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3477                                             tem, unsignedp);
3478     }
3479
3480   /* Try widening multiplication.  */
3481   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3482   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3483       && mul_widen_cost[speed][wider_mode] < max_cost)
3484     {
3485       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3486                           unsignedp, OPTAB_WIDEN);
3487       if (tem)
3488         return extract_high_half (mode, tem);
3489     }
3490
3491   /* Try widening the mode and perform a non-widening multiplication.  */
3492   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3493       && size - 1 < BITS_PER_WORD
3494       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3495     {
3496       rtx insns, wop0, wop1;
3497
3498       /* We need to widen the operands, for example to ensure the
3499          constant multiplier is correctly sign or zero extended.
3500          Use a sequence to clean-up any instructions emitted by
3501          the conversions if things don't work out.  */
3502       start_sequence ();
3503       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3504       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3505       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3506                           unsignedp, OPTAB_WIDEN);
3507       insns = get_insns ();
3508       end_sequence ();
3509
3510       if (tem)
3511         {
3512           emit_insn (insns);
3513           return extract_high_half (mode, tem);
3514         }
3515     }
3516
3517   /* Try widening multiplication of opposite signedness, and adjust.  */
3518   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3519   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3520       && size - 1 < BITS_PER_WORD
3521       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3522           + 4 * add_cost[speed][mode] < max_cost))
3523     {
3524       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3525                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3526       if (tem != 0)
3527         {
3528           tem = extract_high_half (mode, tem);
3529           /* We used the wrong signedness.  Adjust the result.  */
3530           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3531                                               target, unsignedp);
3532         }
3533     }
3534
3535   return 0;
3536 }
3537
3538 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3539    putting the high half of the result in TARGET if that is convenient,
3540    and return where the result is.  If the operation can not be performed,
3541    0 is returned.
3542
3543    MODE is the mode of operation and result.
3544
3545    UNSIGNEDP nonzero means unsigned multiply.
3546
3547    MAX_COST is the total allowed cost for the expanded RTL.  */
3548
3549 static rtx
3550 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3551                       rtx target, int unsignedp, int max_cost)
3552 {
3553   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3554   unsigned HOST_WIDE_INT cnst1;
3555   int extra_cost;
3556   bool sign_adjust = false;
3557   enum mult_variant variant;
3558   struct algorithm alg;
3559   rtx tem;
3560   bool speed = optimize_insn_for_speed_p ();
3561
3562   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3563   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3564   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3565
3566   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3567
3568   /* We can't optimize modes wider than BITS_PER_WORD.
3569      ??? We might be able to perform double-word arithmetic if
3570      mode == word_mode, however all the cost calculations in
3571      synth_mult etc. assume single-word operations.  */
3572   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3573     return expand_mult_highpart_optab (mode, op0, op1, target,
3574                                        unsignedp, max_cost);
3575
3576   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3577
3578   /* Check whether we try to multiply by a negative constant.  */
3579   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3580     {
3581       sign_adjust = true;
3582       extra_cost += add_cost[speed][mode];
3583     }
3584
3585   /* See whether shift/add multiplication is cheap enough.  */
3586   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3587                            max_cost - extra_cost))
3588     {
3589       /* See whether the specialized multiplication optabs are
3590          cheaper than the shift/add version.  */
3591       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3592                                         alg.cost.cost + extra_cost);
3593       if (tem)
3594         return tem;
3595
3596       tem = convert_to_mode (wider_mode, op0, unsignedp);
3597       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3598       tem = extract_high_half (mode, tem);
3599
3600       /* Adjust result for signedness.  */
3601       if (sign_adjust)
3602         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3603
3604       return tem;
3605     }
3606   return expand_mult_highpart_optab (mode, op0, op1, target,
3607                                      unsignedp, max_cost);
3608 }
3609
3610
3611 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3612
3613 static rtx
3614 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3615 {
3616   unsigned HOST_WIDE_INT masklow, maskhigh;
3617   rtx result, temp, shift, label;
3618   int logd;
3619
3620   logd = floor_log2 (d);
3621   result = gen_reg_rtx (mode);
3622
3623   /* Avoid conditional branches when they're expensive.  */
3624   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3625       && optimize_insn_for_speed_p ())
3626     {
3627       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3628                                       mode, 0, -1);
3629       if (signmask)
3630         {
3631           signmask = force_reg (mode, signmask);
3632           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3633           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3634
3635           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3636              which instruction sequence to use.  If logical right shifts
3637              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3638              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3639
3640           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3641           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3642               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3643             {
3644               temp = expand_binop (mode, xor_optab, op0, signmask,
3645                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646               temp = expand_binop (mode, sub_optab, temp, signmask,
3647                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3648               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650               temp = expand_binop (mode, xor_optab, temp, signmask,
3651                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652               temp = expand_binop (mode, sub_optab, temp, signmask,
3653                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654             }
3655           else
3656             {
3657               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3658                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659               signmask = force_reg (mode, signmask);
3660
3661               temp = expand_binop (mode, add_optab, op0, signmask,
3662                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665               temp = expand_binop (mode, sub_optab, temp, signmask,
3666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667             }
3668           return temp;
3669         }
3670     }
3671
3672   /* Mask contains the mode's signbit and the significant bits of the
3673      modulus.  By including the signbit in the operation, many targets
3674      can avoid an explicit compare operation in the following comparison
3675      against zero.  */
3676
3677   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3678   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3679     {
3680       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3681       maskhigh = -1;
3682     }
3683   else
3684     maskhigh = (HOST_WIDE_INT) -1
3685                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3686
3687   temp = expand_binop (mode, and_optab, op0,
3688                        immed_double_const (masklow, maskhigh, mode),
3689                        result, 1, OPTAB_LIB_WIDEN);
3690   if (temp != result)
3691     emit_move_insn (result, temp);
3692
3693   label = gen_label_rtx ();
3694   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3695
3696   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3697                        0, OPTAB_LIB_WIDEN);
3698   masklow = (HOST_WIDE_INT) -1 << logd;
3699   maskhigh = -1;
3700   temp = expand_binop (mode, ior_optab, temp,
3701                        immed_double_const (masklow, maskhigh, mode),
3702                        result, 1, OPTAB_LIB_WIDEN);
3703   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3704                        0, OPTAB_LIB_WIDEN);
3705   if (temp != result)
3706     emit_move_insn (result, temp);
3707   emit_label (label);
3708   return result;
3709 }
3710
3711 /* Expand signed division of OP0 by a power of two D in mode MODE.
3712    This routine is only called for positive values of D.  */
3713
3714 static rtx
3715 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3716 {
3717   rtx temp, label;
3718   tree shift;
3719   int logd;
3720
3721   logd = floor_log2 (d);
3722   shift = build_int_cst (NULL_TREE, logd);
3723
3724   if (d == 2
3725       && BRANCH_COST (optimize_insn_for_speed_p (),
3726                       false) >= 1)
3727     {
3728       temp = gen_reg_rtx (mode);
3729       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3730       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3731                            0, OPTAB_LIB_WIDEN);
3732       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3733     }
3734
3735 #ifdef HAVE_conditional_move
3736   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3737       >= 2)
3738     {
3739       rtx temp2;
3740
3741       /* ??? emit_conditional_move forces a stack adjustment via
3742          compare_from_rtx so, if the sequence is discarded, it will
3743          be lost.  Do it now instead.  */
3744       do_pending_stack_adjust ();
3745
3746       start_sequence ();
3747       temp2 = copy_to_mode_reg (mode, op0);
3748       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3749                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3750       temp = force_reg (mode, temp);
3751
3752       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3753       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3754                                      mode, temp, temp2, mode, 0);
3755       if (temp2)
3756         {
3757           rtx seq = get_insns ();
3758           end_sequence ();
3759           emit_insn (seq);
3760           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3761         }
3762       end_sequence ();
3763     }
3764 #endif
3765
3766   if (BRANCH_COST (optimize_insn_for_speed_p (),
3767                    false) >= 2)
3768     {
3769       int ushift = GET_MODE_BITSIZE (mode) - logd;
3770
3771       temp = gen_reg_rtx (mode);
3772       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3773       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3774         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3775                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3776       else
3777         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3778                              build_int_cst (NULL_TREE, ushift),
3779                              NULL_RTX, 1);
3780       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3781                            0, OPTAB_LIB_WIDEN);
3782       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3783     }
3784
3785   label = gen_label_rtx ();
3786   temp = copy_to_mode_reg (mode, op0);
3787   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3788   expand_inc (temp, GEN_INT (d - 1));
3789   emit_label (label);
3790   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3791 }
3792 \f
3793 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3794    if that is convenient, and returning where the result is.
3795    You may request either the quotient or the remainder as the result;
3796    specify REM_FLAG nonzero to get the remainder.
3797
3798    CODE is the expression code for which kind of division this is;
3799    it controls how rounding is done.  MODE is the machine mode to use.
3800    UNSIGNEDP nonzero means do unsigned division.  */
3801
3802 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3803    and then correct it by or'ing in missing high bits
3804    if result of ANDI is nonzero.
3805    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3806    This could optimize to a bfexts instruction.
3807    But C doesn't use these operations, so their optimizations are
3808    left for later.  */
3809 /* ??? For modulo, we don't actually need the highpart of the first product,
3810    the low part will do nicely.  And for small divisors, the second multiply
3811    can also be a low-part only multiply or even be completely left out.
3812    E.g. to calculate the remainder of a division by 3 with a 32 bit
3813    multiply, multiply with 0x55555556 and extract the upper two bits;
3814    the result is exact for inputs up to 0x1fffffff.
3815    The input range can be reduced by using cross-sum rules.
3816    For odd divisors >= 3, the following table gives right shift counts
3817    so that if a number is shifted by an integer multiple of the given
3818    amount, the remainder stays the same:
3819    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3820    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3821    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3822    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3823    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3824
3825    Cross-sum rules for even numbers can be derived by leaving as many bits
3826    to the right alone as the divisor has zeros to the right.
3827    E.g. if x is an unsigned 32 bit number:
3828    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3829    */
3830
3831 rtx
3832 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3833                rtx op0, rtx op1, rtx target, int unsignedp)
3834 {
3835   enum machine_mode compute_mode;
3836   rtx tquotient;
3837   rtx quotient = 0, remainder = 0;
3838   rtx last;
3839   int size;
3840   rtx insn, set;
3841   optab optab1, optab2;
3842   int op1_is_constant, op1_is_pow2 = 0;
3843   int max_cost, extra_cost;
3844   static HOST_WIDE_INT last_div_const = 0;
3845   static HOST_WIDE_INT ext_op1;
3846   bool speed = optimize_insn_for_speed_p ();
3847
3848   op1_is_constant = CONST_INT_P (op1);
3849   if (op1_is_constant)
3850     {
3851       ext_op1 = INTVAL (op1);
3852       if (unsignedp)
3853         ext_op1 &= GET_MODE_MASK (mode);
3854       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3855                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3856     }
3857
3858   /*
3859      This is the structure of expand_divmod:
3860
3861      First comes code to fix up the operands so we can perform the operations
3862      correctly and efficiently.
3863
3864      Second comes a switch statement with code specific for each rounding mode.
3865      For some special operands this code emits all RTL for the desired
3866      operation, for other cases, it generates only a quotient and stores it in
3867      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3868      to indicate that it has not done anything.
3869
3870      Last comes code that finishes the operation.  If QUOTIENT is set and
3871      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3872      QUOTIENT is not set, it is computed using trunc rounding.
3873
3874      We try to generate special code for division and remainder when OP1 is a
3875      constant.  If |OP1| = 2**n we can use shifts and some other fast
3876      operations.  For other values of OP1, we compute a carefully selected
3877      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3878      by m.
3879
3880      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3881      half of the product.  Different strategies for generating the product are
3882      implemented in expand_mult_highpart.
3883
3884      If what we actually want is the remainder, we generate that by another
3885      by-constant multiplication and a subtraction.  */
3886
3887   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3888      code below will malfunction if we are, so check here and handle
3889      the special case if so.  */
3890   if (op1 == const1_rtx)
3891     return rem_flag ? const0_rtx : op0;
3892
3893     /* When dividing by -1, we could get an overflow.
3894      negv_optab can handle overflows.  */
3895   if (! unsignedp && op1 == constm1_rtx)
3896     {
3897       if (rem_flag)
3898         return const0_rtx;
3899       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3900                           ? negv_optab : neg_optab, op0, target, 0);
3901     }
3902
3903   if (target
3904       /* Don't use the function value register as a target
3905          since we have to read it as well as write it,
3906          and function-inlining gets confused by this.  */
3907       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3908           /* Don't clobber an operand while doing a multi-step calculation.  */
3909           || ((rem_flag || op1_is_constant)
3910               && (reg_mentioned_p (target, op0)
3911                   || (MEM_P (op0) && MEM_P (target))))
3912           || reg_mentioned_p (target, op1)
3913           || (MEM_P (op1) && MEM_P (target))))
3914     target = 0;
3915
3916   /* Get the mode in which to perform this computation.  Normally it will
3917      be MODE, but sometimes we can't do the desired operation in MODE.
3918      If so, pick a wider mode in which we can do the operation.  Convert
3919      to that mode at the start to avoid repeated conversions.
3920
3921      First see what operations we need.  These depend on the expression
3922      we are evaluating.  (We assume that divxx3 insns exist under the
3923      same conditions that modxx3 insns and that these insns don't normally
3924      fail.  If these assumptions are not correct, we may generate less
3925      efficient code in some cases.)
3926
3927      Then see if we find a mode in which we can open-code that operation
3928      (either a division, modulus, or shift).  Finally, check for the smallest
3929      mode for which we can do the operation with a library call.  */
3930
3931   /* We might want to refine this now that we have division-by-constant
3932      optimization.  Since expand_mult_highpart tries so many variants, it is
3933      not straightforward to generalize this.  Maybe we should make an array
3934      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3935
3936   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3937             ? (unsignedp ? lshr_optab : ashr_optab)
3938             : (unsignedp ? udiv_optab : sdiv_optab));
3939   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3940             ? optab1
3941             : (unsignedp ? udivmod_optab : sdivmod_optab));
3942
3943   for (compute_mode = mode; compute_mode != VOIDmode;
3944        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3945     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3946         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3947       break;
3948
3949   if (compute_mode == VOIDmode)
3950     for (compute_mode = mode; compute_mode != VOIDmode;
3951          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3952       if (optab_libfunc (optab1, compute_mode)
3953           || optab_libfunc (optab2, compute_mode))
3954         break;
3955
3956   /* If we still couldn't find a mode, use MODE, but expand_binop will
3957      probably die.  */
3958   if (compute_mode == VOIDmode)
3959     compute_mode = mode;
3960
3961   if (target && GET_MODE (target) == compute_mode)
3962     tquotient = target;
3963   else
3964     tquotient = gen_reg_rtx (compute_mode);
3965
3966   size = GET_MODE_BITSIZE (compute_mode);
3967 #if 0
3968   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3969      (mode), and thereby get better code when OP1 is a constant.  Do that
3970      later.  It will require going over all usages of SIZE below.  */
3971   size = GET_MODE_BITSIZE (mode);
3972 #endif
3973
3974   /* Only deduct something for a REM if the last divide done was
3975      for a different constant.   Then set the constant of the last
3976      divide.  */
3977   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3978   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3979                      && INTVAL (op1) == last_div_const))
3980     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3981
3982   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3983
3984   /* Now convert to the best mode to use.  */
3985   if (compute_mode != mode)
3986     {
3987       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3988       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3989
3990       /* convert_modes may have placed op1 into a register, so we
3991          must recompute the following.  */
3992       op1_is_constant = CONST_INT_P (op1);
3993       op1_is_pow2 = (op1_is_constant
3994                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3995                           || (! unsignedp
3996                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3997     }
3998
3999   /* If one of the operands is a volatile MEM, copy it into a register.  */
4000
4001   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4002     op0 = force_reg (compute_mode, op0);
4003   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4004     op1 = force_reg (compute_mode, op1);
4005
4006   /* If we need the remainder or if OP1 is constant, we need to
4007      put OP0 in a register in case it has any queued subexpressions.  */
4008   if (rem_flag || op1_is_constant)
4009     op0 = force_reg (compute_mode, op0);
4010
4011   last = get_last_insn ();
4012
4013   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4014   if (unsignedp)
4015     {
4016       if (code == FLOOR_DIV_EXPR)
4017         code = TRUNC_DIV_EXPR;
4018       if (code == FLOOR_MOD_EXPR)
4019         code = TRUNC_MOD_EXPR;
4020       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4021         code = TRUNC_DIV_EXPR;
4022     }
4023
4024   if (op1 != const0_rtx)
4025     switch (code)
4026       {
4027       case TRUNC_MOD_EXPR:
4028       case TRUNC_DIV_EXPR:
4029         if (op1_is_constant)
4030           {
4031             if (unsignedp)
4032               {
4033                 unsigned HOST_WIDE_INT mh;
4034                 int pre_shift, post_shift;
4035                 int dummy;
4036                 rtx ml;
4037                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4038                                             & GET_MODE_MASK (compute_mode));
4039
4040                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4041                   {
4042                     pre_shift = floor_log2 (d);
4043                     if (rem_flag)
4044                       {
4045                         remainder
4046                           = expand_binop (compute_mode, and_optab, op0,
4047                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4048                                           remainder, 1,
4049                                           OPTAB_LIB_WIDEN);
4050                         if (remainder)
4051                           return gen_lowpart (mode, remainder);
4052                       }
4053                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4054                                              build_int_cst (NULL_TREE,
4055                                                             pre_shift),
4056                                              tquotient, 1);
4057                   }
4058                 else if (size <= HOST_BITS_PER_WIDE_INT)
4059                   {
4060                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4061                       {
4062                         /* Most significant bit of divisor is set; emit an scc
4063                            insn.  */
4064                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4065                                                           compute_mode, 1, 1);
4066                       }
4067                     else
4068                       {
4069                         /* Find a suitable multiplier and right shift count
4070                            instead of multiplying with D.  */
4071
4072                         mh = choose_multiplier (d, size, size,
4073                                                 &ml, &post_shift, &dummy);
4074
4075                         /* If the suggested multiplier is more than SIZE bits,
4076                            we can do better for even divisors, using an
4077                            initial right shift.  */
4078                         if (mh != 0 && (d & 1) == 0)
4079                           {
4080                             pre_shift = floor_log2 (d & -d);
4081                             mh = choose_multiplier (d >> pre_shift, size,
4082                                                     size - pre_shift,
4083                                                     &ml, &post_shift, &dummy);
4084                             gcc_assert (!mh);
4085                           }
4086                         else
4087                           pre_shift = 0;
4088
4089                         if (mh != 0)
4090                           {
4091                             rtx t1, t2, t3, t4;
4092
4093                             if (post_shift - 1 >= BITS_PER_WORD)
4094                               goto fail1;
4095
4096                             extra_cost
4097                               = (shift_cost[speed][compute_mode][post_shift - 1]
4098                                  + shift_cost[speed][compute_mode][1]
4099                                  + 2 * add_cost[speed][compute_mode]);
4100                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4101                                                        NULL_RTX, 1,
4102                                                        max_cost - extra_cost);
4103                             if (t1 == 0)
4104                               goto fail1;
4105                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4106                                                                op0, t1),
4107                                                 NULL_RTX);
4108                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4109                                                integer_one_node, NULL_RTX, 1);
4110                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4111                                                               t1, t3),
4112                                                 NULL_RTX);
4113                             quotient = expand_shift
4114                               (RSHIFT_EXPR, compute_mode, t4,
4115                                build_int_cst (NULL_TREE, post_shift - 1),
4116                                tquotient, 1);
4117                           }
4118                         else
4119                           {
4120                             rtx t1, t2;
4121
4122                             if (pre_shift >= BITS_PER_WORD
4123                                 || post_shift >= BITS_PER_WORD)
4124                               goto fail1;
4125
4126                             t1 = expand_shift
4127                               (RSHIFT_EXPR, compute_mode, op0,
4128                                build_int_cst (NULL_TREE, pre_shift),
4129                                NULL_RTX, 1);
4130                             extra_cost
4131                               = (shift_cost[speed][compute_mode][pre_shift]
4132                                  + shift_cost[speed][compute_mode][post_shift]);
4133                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4134                                                        NULL_RTX, 1,
4135                                                        max_cost - extra_cost);
4136                             if (t2 == 0)
4137                               goto fail1;
4138                             quotient = expand_shift
4139                               (RSHIFT_EXPR, compute_mode, t2,
4140                                build_int_cst (NULL_TREE, post_shift),
4141                                tquotient, 1);
4142                           }
4143                       }
4144                   }
4145                 else            /* Too wide mode to use tricky code */
4146                   break;
4147
4148                 insn = get_last_insn ();
4149                 if (insn != last
4150                     && (set = single_set (insn)) != 0
4151                     && SET_DEST (set) == quotient)
4152                   set_unique_reg_note (insn,
4153                                        REG_EQUAL,
4154                                        gen_rtx_UDIV (compute_mode, op0, op1));
4155               }
4156             else                /* TRUNC_DIV, signed */
4157               {
4158                 unsigned HOST_WIDE_INT ml;
4159                 int lgup, post_shift;
4160                 rtx mlr;
4161                 HOST_WIDE_INT d = INTVAL (op1);
4162                 unsigned HOST_WIDE_INT abs_d;
4163
4164                 /* Since d might be INT_MIN, we have to cast to
4165                    unsigned HOST_WIDE_INT before negating to avoid
4166                    undefined signed overflow.  */
4167                 abs_d = (d >= 0
4168                          ? (unsigned HOST_WIDE_INT) d
4169                          : - (unsigned HOST_WIDE_INT) d);
4170
4171                 /* n rem d = n rem -d */
4172                 if (rem_flag && d < 0)
4173                   {
4174                     d = abs_d;
4175                     op1 = gen_int_mode (abs_d, compute_mode);
4176                   }
4177
4178                 if (d == 1)
4179                   quotient = op0;
4180                 else if (d == -1)
4181                   quotient = expand_unop (compute_mode, neg_optab, op0,
4182                                           tquotient, 0);
4183                 else if (HOST_BITS_PER_WIDE_INT >= size
4184                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4185                   {
4186                     /* This case is not handled correctly below.  */
4187                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4188                                                 compute_mode, 1, 1);
4189                     if (quotient == 0)
4190                       goto fail1;
4191                   }
4192                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4193                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4194                                       : sdiv_pow2_cheap[speed][compute_mode])
4195                          /* We assume that cheap metric is true if the
4196                             optab has an expander for this mode.  */
4197                          && ((optab_handler ((rem_flag ? smod_optab
4198                                               : sdiv_optab),
4199                                              compute_mode)
4200                               != CODE_FOR_nothing)
4201                              || (optab_handler (sdivmod_optab,
4202                                                 compute_mode)
4203                                  != CODE_FOR_nothing)))
4204                   ;
4205                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4206                   {
4207                     if (rem_flag)
4208                       {
4209                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4210                         if (remainder)
4211                           return gen_lowpart (mode, remainder);
4212                       }
4213
4214                     if (sdiv_pow2_cheap[speed][compute_mode]
4215                         && ((optab_handler (sdiv_optab, compute_mode)
4216                              != CODE_FOR_nothing)
4217                             || (optab_handler (sdivmod_optab, compute_mode)
4218                                 != CODE_FOR_nothing)))
4219                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4220                                                 compute_mode, op0,
4221                                                 gen_int_mode (abs_d,
4222                                                               compute_mode),
4223                                                 NULL_RTX, 0);
4224                     else
4225                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4226
4227                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4228                        negate the quotient.  */
4229                     if (d < 0)
4230                       {
4231                         insn = get_last_insn ();
4232                         if (insn != last
4233                             && (set = single_set (insn)) != 0
4234                             && SET_DEST (set) == quotient
4235                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4236                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4237                           set_unique_reg_note (insn,
4238                                                REG_EQUAL,
4239                                                gen_rtx_DIV (compute_mode,
4240                                                             op0,
4241                                                             GEN_INT
4242                                                             (trunc_int_for_mode
4243                                                              (abs_d,
4244                                                               compute_mode))));
4245
4246                         quotient = expand_unop (compute_mode, neg_optab,
4247                                                 quotient, quotient, 0);
4248                       }
4249                   }
4250                 else if (size <= HOST_BITS_PER_WIDE_INT)
4251                   {
4252                     choose_multiplier (abs_d, size, size - 1,
4253                                        &mlr, &post_shift, &lgup);
4254                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4255                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4256                       {
4257                         rtx t1, t2, t3;
4258
4259                         if (post_shift >= BITS_PER_WORD
4260                             || size - 1 >= BITS_PER_WORD)
4261                           goto fail1;
4262
4263                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4264                                       + shift_cost[speed][compute_mode][size - 1]
4265                                       + add_cost[speed][compute_mode]);
4266                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4267                                                    NULL_RTX, 0,
4268                                                    max_cost - extra_cost);
4269                         if (t1 == 0)
4270                           goto fail1;
4271                         t2 = expand_shift
4272                           (RSHIFT_EXPR, compute_mode, t1,
4273                            build_int_cst (NULL_TREE, post_shift),
4274                            NULL_RTX, 0);
4275                         t3 = expand_shift
4276                           (RSHIFT_EXPR, compute_mode, op0,
4277                            build_int_cst (NULL_TREE, size - 1),
4278                            NULL_RTX, 0);
4279                         if (d < 0)
4280                           quotient
4281                             = force_operand (gen_rtx_MINUS (compute_mode,
4282                                                             t3, t2),
4283                                              tquotient);
4284                         else
4285                           quotient
4286                             = force_operand (gen_rtx_MINUS (compute_mode,
4287                                                             t2, t3),
4288                                              tquotient);
4289                       }
4290                     else
4291                       {
4292                         rtx t1, t2, t3, t4;
4293
4294                         if (post_shift >= BITS_PER_WORD
4295                             || size - 1 >= BITS_PER_WORD)
4296                           goto fail1;
4297
4298                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4299                         mlr = gen_int_mode (ml, compute_mode);
4300                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4301                                       + shift_cost[speed][compute_mode][size - 1]
4302                                       + 2 * add_cost[speed][compute_mode]);
4303                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4304                                                    NULL_RTX, 0,
4305                                                    max_cost - extra_cost);
4306                         if (t1 == 0)
4307                           goto fail1;
4308                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4309                                                           t1, op0),
4310                                             NULL_RTX);
4311                         t3 = expand_shift
4312                           (RSHIFT_EXPR, compute_mode, t2,
4313                            build_int_cst (NULL_TREE, post_shift),
4314                            NULL_RTX, 0);
4315                         t4 = expand_shift
4316                           (RSHIFT_EXPR, compute_mode, op0,
4317                            build_int_cst (NULL_TREE, size - 1),
4318                            NULL_RTX, 0);
4319                         if (d < 0)
4320                           quotient
4321                             = force_operand (gen_rtx_MINUS (compute_mode,
4322                                                             t4, t3),
4323                                              tquotient);
4324                         else
4325                           quotient
4326                             = force_operand (gen_rtx_MINUS (compute_mode,
4327                                                             t3, t4),
4328                                              tquotient);
4329                       }
4330                   }
4331                 else            /* Too wide mode to use tricky code */
4332                   break;
4333
4334                 insn = get_last_insn ();
4335                 if (insn != last
4336                     && (set = single_set (insn)) != 0
4337                     && SET_DEST (set) == quotient)
4338                   set_unique_reg_note (insn,
4339                                        REG_EQUAL,
4340                                        gen_rtx_DIV (compute_mode, op0, op1));
4341               }
4342             break;
4343           }
4344       fail1:
4345         delete_insns_since (last);
4346         break;
4347
4348       case FLOOR_DIV_EXPR:
4349       case FLOOR_MOD_EXPR:
4350       /* We will come here only for signed operations.  */
4351         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4352           {
4353             unsigned HOST_WIDE_INT mh;
4354             int pre_shift, lgup, post_shift;
4355             HOST_WIDE_INT d = INTVAL (op1);
4356             rtx ml;
4357
4358             if (d > 0)
4359               {
4360                 /* We could just as easily deal with negative constants here,
4361                    but it does not seem worth the trouble for GCC 2.6.  */
4362                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4363                   {
4364                     pre_shift = floor_log2 (d);
4365                     if (rem_flag)
4366                       {
4367                         remainder = expand_binop (compute_mode, and_optab, op0,
4368                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4369                                                   remainder, 0, OPTAB_LIB_WIDEN);
4370                         if (remainder)
4371                           return gen_lowpart (mode, remainder);
4372                       }
4373                     quotient = expand_shift
4374                       (RSHIFT_EXPR, compute_mode, op0,
4375                        build_int_cst (NULL_TREE, pre_shift),
4376                        tquotient, 0);
4377                   }
4378                 else
4379                   {
4380                     rtx t1, t2, t3, t4;
4381
4382                     mh = choose_multiplier (d, size, size - 1,
4383                                             &ml, &post_shift, &lgup);
4384                     gcc_assert (!mh);
4385
4386                     if (post_shift < BITS_PER_WORD
4387                         && size - 1 < BITS_PER_WORD)
4388                       {
4389                         t1 = expand_shift
4390                           (RSHIFT_EXPR, compute_mode, op0,
4391                            build_int_cst (NULL_TREE, size - 1),
4392                            NULL_RTX, 0);
4393                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4394                                            NULL_RTX, 0, OPTAB_WIDEN);
4395                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4396                                       + shift_cost[speed][compute_mode][size - 1]
4397                                       + 2 * add_cost[speed][compute_mode]);
4398                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4399                                                    NULL_RTX, 1,
4400                                                    max_cost - extra_cost);
4401                         if (t3 != 0)
4402                           {
4403                             t4 = expand_shift
4404                               (RSHIFT_EXPR, compute_mode, t3,
4405                                build_int_cst (NULL_TREE, post_shift),
4406                                NULL_RTX, 1);
4407                             quotient = expand_binop (compute_mode, xor_optab,
4408                                                      t4, t1, tquotient, 0,
4409                                                      OPTAB_WIDEN);
4410                           }
4411                       }
4412                   }
4413               }
4414             else
4415               {
4416                 rtx nsign, t1, t2, t3, t4;
4417                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4418                                                   op0, constm1_rtx), NULL_RTX);
4419                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4420                                    0, OPTAB_WIDEN);
4421                 nsign = expand_shift
4422                   (RSHIFT_EXPR, compute_mode, t2,
4423                    build_int_cst (NULL_TREE, size - 1),
4424                    NULL_RTX, 0);
4425                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4426                                     NULL_RTX);
4427                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4428                                     NULL_RTX, 0);
4429                 if (t4)
4430                   {
4431                     rtx t5;
4432                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4433                                       NULL_RTX, 0);
4434                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4435                                                             t4, t5),
4436                                               tquotient);
4437                   }
4438               }
4439           }
4440
4441         if (quotient != 0)
4442           break;
4443         delete_insns_since (last);
4444
4445         /* Try using an instruction that produces both the quotient and
4446            remainder, using truncation.  We can easily compensate the quotient
4447            or remainder to get floor rounding, once we have the remainder.
4448            Notice that we compute also the final remainder value here,
4449            and return the result right away.  */
4450         if (target == 0 || GET_MODE (target) != compute_mode)
4451           target = gen_reg_rtx (compute_mode);
4452
4453         if (rem_flag)
4454           {
4455             remainder
4456               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4457             quotient = gen_reg_rtx (compute_mode);
4458           }
4459         else
4460           {
4461             quotient
4462               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4463             remainder = gen_reg_rtx (compute_mode);
4464           }
4465
4466         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4467                                  quotient, remainder, 0))
4468           {
4469             /* This could be computed with a branch-less sequence.
4470                Save that for later.  */
4471             rtx tem;
4472             rtx label = gen_label_rtx ();
4473             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4474             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4475                                 NULL_RTX, 0, OPTAB_WIDEN);
4476             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4477             expand_dec (quotient, const1_rtx);
4478             expand_inc (remainder, op1);
4479             emit_label (label);
4480             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4481           }
4482
4483         /* No luck with division elimination or divmod.  Have to do it
4484            by conditionally adjusting op0 *and* the result.  */
4485         {
4486           rtx label1, label2, label3, label4, label5;
4487           rtx adjusted_op0;
4488           rtx tem;
4489
4490           quotient = gen_reg_rtx (compute_mode);
4491           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4492           label1 = gen_label_rtx ();
4493           label2 = gen_label_rtx ();
4494           label3 = gen_label_rtx ();
4495           label4 = gen_label_rtx ();
4496           label5 = gen_label_rtx ();
4497           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4498           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4499           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4500                               quotient, 0, OPTAB_LIB_WIDEN);
4501           if (tem != quotient)
4502             emit_move_insn (quotient, tem);
4503           emit_jump_insn (gen_jump (label5));
4504           emit_barrier ();
4505           emit_label (label1);
4506           expand_inc (adjusted_op0, const1_rtx);
4507           emit_jump_insn (gen_jump (label4));
4508           emit_barrier ();
4509           emit_label (label2);
4510           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4511           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512                               quotient, 0, OPTAB_LIB_WIDEN);
4513           if (tem != quotient)
4514             emit_move_insn (quotient, tem);
4515           emit_jump_insn (gen_jump (label5));
4516           emit_barrier ();
4517           emit_label (label3);
4518           expand_dec (adjusted_op0, const1_rtx);
4519           emit_label (label4);
4520           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521                               quotient, 0, OPTAB_LIB_WIDEN);
4522           if (tem != quotient)
4523             emit_move_insn (quotient, tem);
4524           expand_dec (quotient, const1_rtx);
4525           emit_label (label5);
4526         }
4527         break;
4528
4529       case CEIL_DIV_EXPR:
4530       case CEIL_MOD_EXPR:
4531         if (unsignedp)
4532           {
4533             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4534               {
4535                 rtx t1, t2, t3;
4536                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4537                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4538                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4539                                    tquotient, 1);
4540                 t2 = expand_binop (compute_mode, and_optab, op0,
4541                                    GEN_INT (d - 1),
4542                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543                 t3 = gen_reg_rtx (compute_mode);
4544                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545                                       compute_mode, 1, 1);
4546                 if (t3 == 0)
4547                   {
4548                     rtx lab;
4549                     lab = gen_label_rtx ();
4550                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551                     expand_inc (t1, const1_rtx);
4552                     emit_label (lab);
4553                     quotient = t1;
4554                   }
4555                 else
4556                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557                                                           t1, t3),
4558                                             tquotient);
4559                 break;
4560               }
4561
4562             /* Try using an instruction that produces both the quotient and
4563                remainder, using truncation.  We can easily compensate the
4564                quotient or remainder to get ceiling rounding, once we have the
4565                remainder.  Notice that we compute also the final remainder
4566                value here, and return the result right away.  */
4567             if (target == 0 || GET_MODE (target) != compute_mode)
4568               target = gen_reg_rtx (compute_mode);
4569
4570             if (rem_flag)
4571               {
4572                 remainder = (REG_P (target)
4573                              ? target : gen_reg_rtx (compute_mode));
4574                 quotient = gen_reg_rtx (compute_mode);
4575               }
4576             else
4577               {
4578                 quotient = (REG_P (target)
4579                             ? target : gen_reg_rtx (compute_mode));
4580                 remainder = gen_reg_rtx (compute_mode);
4581               }
4582
4583             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584                                      remainder, 1))
4585               {
4586                 /* This could be computed with a branch-less sequence.
4587                    Save that for later.  */
4588                 rtx label = gen_label_rtx ();
4589                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4590                                  compute_mode, label);
4591                 expand_inc (quotient, const1_rtx);
4592                 expand_dec (remainder, op1);
4593                 emit_label (label);
4594                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4595               }
4596
4597             /* No luck with division elimination or divmod.  Have to do it
4598                by conditionally adjusting op0 *and* the result.  */
4599             {
4600               rtx label1, label2;
4601               rtx adjusted_op0, tem;
4602
4603               quotient = gen_reg_rtx (compute_mode);
4604               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605               label1 = gen_label_rtx ();
4606               label2 = gen_label_rtx ();
4607               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608                                compute_mode, label1);
4609               emit_move_insn  (quotient, const0_rtx);
4610               emit_jump_insn (gen_jump (label2));
4611               emit_barrier ();
4612               emit_label (label1);
4613               expand_dec (adjusted_op0, const1_rtx);
4614               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615                                   quotient, 1, OPTAB_LIB_WIDEN);
4616               if (tem != quotient)
4617                 emit_move_insn (quotient, tem);
4618               expand_inc (quotient, const1_rtx);
4619               emit_label (label2);
4620             }
4621           }
4622         else /* signed */
4623           {
4624             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625                 && INTVAL (op1) >= 0)
4626               {
4627                 /* This is extremely similar to the code for the unsigned case
4628                    above.  For 2.7 we should merge these variants, but for
4629                    2.6.1 I don't want to touch the code for unsigned since that
4630                    get used in C.  The signed case will only be used by other
4631                    languages (Ada).  */
4632
4633                 rtx t1, t2, t3;
4634                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4635                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4637                                    tquotient, 0);
4638                 t2 = expand_binop (compute_mode, and_optab, op0,
4639                                    GEN_INT (d - 1),
4640                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4641                 t3 = gen_reg_rtx (compute_mode);
4642                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4643                                       compute_mode, 1, 1);
4644                 if (t3 == 0)
4645                   {
4646                     rtx lab;
4647                     lab = gen_label_rtx ();
4648                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4649                     expand_inc (t1, const1_rtx);
4650                     emit_label (lab);
4651                     quotient = t1;
4652                   }
4653                 else
4654                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4655                                                           t1, t3),
4656                                             tquotient);
4657                 break;
4658               }
4659
4660             /* Try using an instruction that produces both the quotient and
4661                remainder, using truncation.  We can easily compensate the
4662                quotient or remainder to get ceiling rounding, once we have the
4663                remainder.  Notice that we compute also the final remainder
4664                value here, and return the result right away.  */
4665             if (target == 0 || GET_MODE (target) != compute_mode)
4666               target = gen_reg_rtx (compute_mode);
4667             if (rem_flag)
4668               {
4669                 remainder= (REG_P (target)
4670                             ? target : gen_reg_rtx (compute_mode));
4671                 quotient = gen_reg_rtx (compute_mode);
4672               }
4673             else
4674               {
4675                 quotient = (REG_P (target)
4676                             ? target : gen_reg_rtx (compute_mode));
4677                 remainder = gen_reg_rtx (compute_mode);
4678               }
4679
4680             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4681                                      remainder, 0))
4682               {
4683                 /* This could be computed with a branch-less sequence.
4684                    Save that for later.  */
4685                 rtx tem;
4686                 rtx label = gen_label_rtx ();
4687                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4688                                  compute_mode, label);
4689                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4690                                     NULL_RTX, 0, OPTAB_WIDEN);
4691                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4692                 expand_inc (quotient, const1_rtx);
4693                 expand_dec (remainder, op1);
4694                 emit_label (label);
4695                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4696               }
4697
4698             /* No luck with division elimination or divmod.  Have to do it
4699                by conditionally adjusting op0 *and* the result.  */
4700             {
4701               rtx label1, label2, label3, label4, label5;
4702               rtx adjusted_op0;
4703               rtx tem;
4704
4705               quotient = gen_reg_rtx (compute_mode);
4706               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4707               label1 = gen_label_rtx ();
4708               label2 = gen_label_rtx ();
4709               label3 = gen_label_rtx ();
4710               label4 = gen_label_rtx ();
4711               label5 = gen_label_rtx ();
4712               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4713               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4714                                compute_mode, label1);
4715               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716                                   quotient, 0, OPTAB_LIB_WIDEN);
4717               if (tem != quotient)
4718                 emit_move_insn (quotient, tem);
4719               emit_jump_insn (gen_jump (label5));
4720               emit_barrier ();
4721               emit_label (label1);
4722               expand_dec (adjusted_op0, const1_rtx);
4723               emit_jump_insn (gen_jump (label4));
4724               emit_barrier ();
4725               emit_label (label2);
4726               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4727                                compute_mode, label3);
4728               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4729                                   quotient, 0, OPTAB_LIB_WIDEN);
4730               if (tem != quotient)
4731                 emit_move_insn (quotient, tem);
4732               emit_jump_insn (gen_jump (label5));
4733               emit_barrier ();
4734               emit_label (label3);
4735               expand_inc (adjusted_op0, const1_rtx);
4736               emit_label (label4);
4737               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4738                                   quotient, 0, OPTAB_LIB_WIDEN);
4739               if (tem != quotient)
4740                 emit_move_insn (quotient, tem);
4741               expand_inc (quotient, const1_rtx);
4742               emit_label (label5);
4743             }
4744           }
4745         break;
4746
4747       case EXACT_DIV_EXPR:
4748         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4749           {
4750             HOST_WIDE_INT d = INTVAL (op1);
4751             unsigned HOST_WIDE_INT ml;
4752             int pre_shift;
4753             rtx t1;
4754
4755             pre_shift = floor_log2 (d & -d);
4756             ml = invert_mod2n (d >> pre_shift, size);
4757             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4758                                build_int_cst (NULL_TREE, pre_shift),
4759                                NULL_RTX, unsignedp);
4760             quotient = expand_mult (compute_mode, t1,
4761                                     gen_int_mode (ml, compute_mode),
4762                                     NULL_RTX, 1);
4763
4764             insn = get_last_insn ();
4765             set_unique_reg_note (insn,
4766                                  REG_EQUAL,
4767                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4768                                                  compute_mode,
4769                                                  op0, op1));
4770           }
4771         break;
4772
4773       case ROUND_DIV_EXPR:
4774       case ROUND_MOD_EXPR:
4775         if (unsignedp)
4776           {
4777             rtx tem;
4778             rtx label;
4779             label = gen_label_rtx ();
4780             quotient = gen_reg_rtx (compute_mode);
4781             remainder = gen_reg_rtx (compute_mode);
4782             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4783               {
4784                 rtx tem;
4785                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4786                                          quotient, 1, OPTAB_LIB_WIDEN);
4787                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4788                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4789                                           remainder, 1, OPTAB_LIB_WIDEN);
4790               }
4791             tem = plus_constant (op1, -1);
4792             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4793                                 integer_one_node, NULL_RTX, 1);
4794             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4795             expand_inc (quotient, const1_rtx);
4796             expand_dec (remainder, op1);
4797             emit_label (label);
4798           }
4799         else
4800           {
4801             rtx abs_rem, abs_op1, tem, mask;
4802             rtx label;
4803             label = gen_label_rtx ();
4804             quotient = gen_reg_rtx (compute_mode);
4805             remainder = gen_reg_rtx (compute_mode);
4806             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4807               {
4808                 rtx tem;
4809                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4810                                          quotient, 0, OPTAB_LIB_WIDEN);
4811                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4812                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4813                                           remainder, 0, OPTAB_LIB_WIDEN);
4814               }
4815             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4816             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4817             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4818                                 integer_one_node, NULL_RTX, 1);
4819             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4820             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4821                                 NULL_RTX, 0, OPTAB_WIDEN);
4822             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4823                                  build_int_cst (NULL_TREE, size - 1),
4824                                  NULL_RTX, 0);
4825             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4826                                 NULL_RTX, 0, OPTAB_WIDEN);
4827             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828                                 NULL_RTX, 0, OPTAB_WIDEN);
4829             expand_inc (quotient, tem);
4830             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4831                                 NULL_RTX, 0, OPTAB_WIDEN);
4832             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4833                                 NULL_RTX, 0, OPTAB_WIDEN);
4834             expand_dec (remainder, tem);
4835             emit_label (label);
4836           }
4837         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4838
4839       default:
4840         gcc_unreachable ();
4841       }
4842
4843   if (quotient == 0)
4844     {
4845       if (target && GET_MODE (target) != compute_mode)
4846         target = 0;
4847
4848       if (rem_flag)
4849         {
4850           /* Try to produce the remainder without producing the quotient.
4851              If we seem to have a divmod pattern that does not require widening,
4852              don't try widening here.  We should really have a WIDEN argument
4853              to expand_twoval_binop, since what we'd really like to do here is
4854              1) try a mod insn in compute_mode
4855              2) try a divmod insn in compute_mode
4856              3) try a div insn in compute_mode and multiply-subtract to get
4857                 remainder
4858              4) try the same things with widening allowed.  */
4859           remainder
4860             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4861                                  op0, op1, target,
4862                                  unsignedp,
4863                                  ((optab_handler (optab2, compute_mode)
4864                                    != CODE_FOR_nothing)
4865                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4866           if (remainder == 0)
4867             {
4868               /* No luck there.  Can we do remainder and divide at once
4869                  without a library call?  */
4870               remainder = gen_reg_rtx (compute_mode);
4871               if (! expand_twoval_binop ((unsignedp
4872                                           ? udivmod_optab
4873                                           : sdivmod_optab),
4874                                          op0, op1,
4875                                          NULL_RTX, remainder, unsignedp))
4876                 remainder = 0;
4877             }
4878
4879           if (remainder)
4880             return gen_lowpart (mode, remainder);
4881         }
4882
4883       /* Produce the quotient.  Try a quotient insn, but not a library call.
4884          If we have a divmod in this mode, use it in preference to widening
4885          the div (for this test we assume it will not fail). Note that optab2
4886          is set to the one of the two optabs that the call below will use.  */
4887       quotient
4888         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4889                              op0, op1, rem_flag ? NULL_RTX : target,
4890                              unsignedp,
4891                              ((optab_handler (optab2, compute_mode)
4892                                != CODE_FOR_nothing)
4893                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4894
4895       if (quotient == 0)
4896         {
4897           /* No luck there.  Try a quotient-and-remainder insn,
4898              keeping the quotient alone.  */
4899           quotient = gen_reg_rtx (compute_mode);
4900           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4901                                      op0, op1,
4902                                      quotient, NULL_RTX, unsignedp))
4903             {
4904               quotient = 0;
4905               if (! rem_flag)
4906                 /* Still no luck.  If we are not computing the remainder,
4907                    use a library call for the quotient.  */
4908                 quotient = sign_expand_binop (compute_mode,
4909                                               udiv_optab, sdiv_optab,
4910                                               op0, op1, target,
4911                                               unsignedp, OPTAB_LIB_WIDEN);
4912             }
4913         }
4914     }
4915
4916   if (rem_flag)
4917     {
4918       if (target && GET_MODE (target) != compute_mode)
4919         target = 0;
4920
4921       if (quotient == 0)
4922         {
4923           /* No divide instruction either.  Use library for remainder.  */
4924           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4925                                          op0, op1, target,
4926                                          unsignedp, OPTAB_LIB_WIDEN);
4927           /* No remainder function.  Try a quotient-and-remainder
4928              function, keeping the remainder.  */
4929           if (!remainder)
4930             {
4931               remainder = gen_reg_rtx (compute_mode);
4932               if (!expand_twoval_binop_libfunc
4933                   (unsignedp ? udivmod_optab : sdivmod_optab,
4934                    op0, op1,
4935                    NULL_RTX, remainder,
4936                    unsignedp ? UMOD : MOD))
4937                 remainder = NULL_RTX;
4938             }
4939         }
4940       else
4941         {
4942           /* We divided.  Now finish doing X - Y * (X / Y).  */
4943           remainder = expand_mult (compute_mode, quotient, op1,
4944                                    NULL_RTX, unsignedp);
4945           remainder = expand_binop (compute_mode, sub_optab, op0,
4946                                     remainder, target, unsignedp,
4947                                     OPTAB_LIB_WIDEN);
4948         }
4949     }
4950
4951   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4952 }
4953 \f
4954 /* Return a tree node with data type TYPE, describing the value of X.
4955    Usually this is an VAR_DECL, if there is no obvious better choice.
4956    X may be an expression, however we only support those expressions
4957    generated by loop.c.  */
4958
4959 tree
4960 make_tree (tree type, rtx x)
4961 {
4962   tree t;
4963
4964   switch (GET_CODE (x))
4965     {
4966     case CONST_INT:
4967       {
4968         HOST_WIDE_INT hi = 0;
4969
4970         if (INTVAL (x) < 0
4971             && !(TYPE_UNSIGNED (type)
4972                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4973                      < HOST_BITS_PER_WIDE_INT)))
4974           hi = -1;
4975
4976         t = build_int_cst_wide (type, INTVAL (x), hi);
4977
4978         return t;
4979       }
4980
4981     case CONST_DOUBLE:
4982       if (GET_MODE (x) == VOIDmode)
4983         t = build_int_cst_wide (type,
4984                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4985       else
4986         {
4987           REAL_VALUE_TYPE d;
4988
4989           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4990           t = build_real (type, d);
4991         }
4992
4993       return t;
4994
4995     case CONST_VECTOR:
4996       {
4997         int units = CONST_VECTOR_NUNITS (x);
4998         tree itype = TREE_TYPE (type);
4999         tree t = NULL_TREE;
5000         int i;
5001
5002
5003         /* Build a tree with vector elements.  */
5004         for (i = units - 1; i >= 0; --i)
5005           {
5006             rtx elt = CONST_VECTOR_ELT (x, i);
5007             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5008           }
5009
5010         return build_vector (type, t);
5011       }
5012
5013     case PLUS:
5014       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5015                           make_tree (type, XEXP (x, 1)));
5016
5017     case MINUS:
5018       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5019                           make_tree (type, XEXP (x, 1)));
5020
5021     case NEG:
5022       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5023
5024     case MULT:
5025       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5026                           make_tree (type, XEXP (x, 1)));
5027
5028     case ASHIFT:
5029       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5030                           make_tree (type, XEXP (x, 1)));
5031
5032     case LSHIFTRT:
5033       t = unsigned_type_for (type);
5034       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5035                                          make_tree (t, XEXP (x, 0)),
5036                                          make_tree (type, XEXP (x, 1))));
5037
5038     case ASHIFTRT:
5039       t = signed_type_for (type);
5040       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5041                                          make_tree (t, XEXP (x, 0)),
5042                                          make_tree (type, XEXP (x, 1))));
5043
5044     case DIV:
5045       if (TREE_CODE (type) != REAL_TYPE)
5046         t = signed_type_for (type);
5047       else
5048         t = type;
5049
5050       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051                                          make_tree (t, XEXP (x, 0)),
5052                                          make_tree (t, XEXP (x, 1))));
5053     case UDIV:
5054       t = unsigned_type_for (type);
5055       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5056                                          make_tree (t, XEXP (x, 0)),
5057                                          make_tree (t, XEXP (x, 1))));
5058
5059     case SIGN_EXTEND:
5060     case ZERO_EXTEND:
5061       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5062                                           GET_CODE (x) == ZERO_EXTEND);
5063       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5064
5065     case CONST:
5066       return make_tree (type, XEXP (x, 0));
5067
5068     case SYMBOL_REF:
5069       t = SYMBOL_REF_DECL (x);
5070       if (t)
5071         return fold_convert (type, build_fold_addr_expr (t));
5072       /* else fall through.  */
5073
5074     default:
5075       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5076
5077       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5078          address mode to pointer mode.  */
5079       if (POINTER_TYPE_P (type))
5080         x = convert_memory_address_addr_space
5081               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5082
5083       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5084          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5085       t->decl_with_rtl.rtl = x;
5086
5087       return t;
5088     }
5089 }
5090 \f
5091 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5092    and returning TARGET.
5093
5094    If TARGET is 0, a pseudo-register or constant is returned.  */
5095
5096 rtx
5097 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5098 {
5099   rtx tem = 0;
5100
5101   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5102     tem = simplify_binary_operation (AND, mode, op0, op1);
5103   if (tem == 0)
5104     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5105
5106   if (target == 0)
5107     target = tem;
5108   else if (tem != target)
5109     emit_move_insn (target, tem);
5110   return target;
5111 }
5112
5113 /* Helper function for emit_store_flag.  */
5114 static rtx
5115 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5116              enum machine_mode mode, enum machine_mode compare_mode,
5117              int unsignedp, rtx x, rtx y, int normalizep,
5118              enum machine_mode target_mode)
5119 {
5120   rtx op0, last, comparison, subtarget, pattern;
5121   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5122
5123   last = get_last_insn ();
5124   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5125   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5126   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5127   if (!x || !y
5128       || !insn_data[icode].operand[2].predicate
5129           (x, insn_data[icode].operand[2].mode)
5130       || !insn_data[icode].operand[3].predicate
5131           (y, insn_data[icode].operand[3].mode)
5132       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5133     {
5134       delete_insns_since (last);
5135       return NULL_RTX;
5136     }
5137
5138   if (target_mode == VOIDmode)
5139     target_mode = result_mode;
5140   if (!target)
5141     target = gen_reg_rtx (target_mode);
5142
5143   if (optimize
5144       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5145     subtarget = gen_reg_rtx (result_mode);
5146   else
5147     subtarget = target;
5148
5149   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5150   if (!pattern)
5151     return NULL_RTX;
5152   emit_insn (pattern);
5153
5154   /* If we are converting to a wider mode, first convert to
5155      TARGET_MODE, then normalize.  This produces better combining
5156      opportunities on machines that have a SIGN_EXTRACT when we are
5157      testing a single bit.  This mostly benefits the 68k.
5158
5159      If STORE_FLAG_VALUE does not have the sign bit set when
5160      interpreted in MODE, we can do this conversion as unsigned, which
5161      is usually more efficient.  */
5162   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5163     {
5164       convert_move (target, subtarget,
5165                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5166                     && 0 == (STORE_FLAG_VALUE
5167                              & ((HOST_WIDE_INT) 1
5168                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5169       op0 = target;
5170       result_mode = target_mode;
5171     }
5172   else
5173     op0 = subtarget;
5174
5175   /* If we want to keep subexpressions around, don't reuse our last
5176      target.  */
5177   if (optimize)
5178     subtarget = 0;
5179
5180   /* Now normalize to the proper value in MODE.  Sometimes we don't
5181      have to do anything.  */
5182   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5183     ;
5184   /* STORE_FLAG_VALUE might be the most negative number, so write
5185      the comparison this way to avoid a compiler-time warning.  */
5186   else if (- normalizep == STORE_FLAG_VALUE)
5187     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5188
5189   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5190      it hard to use a value of just the sign bit due to ANSI integer
5191      constant typing rules.  */
5192   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5193            && (STORE_FLAG_VALUE
5194                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5195     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5196                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5197                         normalizep == 1);
5198   else
5199     {
5200       gcc_assert (STORE_FLAG_VALUE & 1);
5201
5202       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5203       if (normalizep == -1)
5204         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5205     }
5206
5207   /* If we were converting to a smaller mode, do the conversion now.  */
5208   if (target_mode != result_mode)
5209     {
5210       convert_move (target, op0, 0);
5211       return target;
5212     }
5213   else
5214     return op0;
5215 }
5216
5217
5218 /* A subroutine of emit_store_flag only including "tricks" that do not
5219    need a recursive call.  These are kept separate to avoid infinite
5220    loops.  */
5221
5222 static rtx
5223 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5224                    enum machine_mode mode, int unsignedp, int normalizep,
5225                    enum machine_mode target_mode)
5226 {
5227   rtx subtarget;
5228   enum insn_code icode;
5229   enum machine_mode compare_mode;
5230   enum mode_class mclass;
5231   enum rtx_code scode;
5232   rtx tem;
5233
5234   if (unsignedp)
5235     code = unsigned_condition (code);
5236   scode = swap_condition (code);
5237
5238   /* If one operand is constant, make it the second one.  Only do this
5239      if the other operand is not constant as well.  */
5240
5241   if (swap_commutative_operands_p (op0, op1))
5242     {
5243       tem = op0;
5244       op0 = op1;
5245       op1 = tem;
5246       code = swap_condition (code);
5247     }
5248
5249   if (mode == VOIDmode)
5250     mode = GET_MODE (op0);
5251
5252   /* For some comparisons with 1 and -1, we can convert this to
5253      comparisons with zero.  This will often produce more opportunities for
5254      store-flag insns.  */
5255
5256   switch (code)
5257     {
5258     case LT:
5259       if (op1 == const1_rtx)
5260         op1 = const0_rtx, code = LE;
5261       break;
5262     case LE:
5263       if (op1 == constm1_rtx)
5264         op1 = const0_rtx, code = LT;
5265       break;
5266     case GE:
5267       if (op1 == const1_rtx)
5268         op1 = const0_rtx, code = GT;
5269       break;
5270     case GT:
5271       if (op1 == constm1_rtx)
5272         op1 = const0_rtx, code = GE;
5273       break;
5274     case GEU:
5275       if (op1 == const1_rtx)
5276         op1 = const0_rtx, code = NE;
5277       break;
5278     case LTU:
5279       if (op1 == const1_rtx)
5280         op1 = const0_rtx, code = EQ;
5281       break;
5282     default:
5283       break;
5284     }
5285
5286   /* If we are comparing a double-word integer with zero or -1, we can
5287      convert the comparison into one involving a single word.  */
5288   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5289       && GET_MODE_CLASS (mode) == MODE_INT
5290       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5291     {
5292       if ((code == EQ || code == NE)
5293           && (op1 == const0_rtx || op1 == constm1_rtx))
5294         {
5295           rtx op00, op01;
5296
5297           /* Do a logical OR or AND of the two words and compare the
5298              result.  */
5299           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5300           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5301           tem = expand_binop (word_mode,
5302                               op1 == const0_rtx ? ior_optab : and_optab,
5303                               op00, op01, NULL_RTX, unsignedp,
5304                               OPTAB_DIRECT);
5305
5306           if (tem != 0)
5307             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5308                                    unsignedp, normalizep);
5309         }
5310       else if ((code == LT || code == GE) && op1 == const0_rtx)
5311         {
5312           rtx op0h;
5313
5314           /* If testing the sign bit, can just test on high word.  */
5315           op0h = simplify_gen_subreg (word_mode, op0, mode,
5316                                       subreg_highpart_offset (word_mode,
5317                                                               mode));
5318           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5319                                  unsignedp, normalizep);
5320         }
5321       else
5322         tem = NULL_RTX;
5323
5324       if (tem)
5325         {
5326           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5327             return tem;
5328           if (!target)
5329             target = gen_reg_rtx (target_mode);
5330
5331           convert_move (target, tem,
5332                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5333                               & ((HOST_WIDE_INT) 1
5334                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5335           return target;
5336         }
5337     }
5338
5339   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5340      complement of A (for GE) and shifting the sign bit to the low bit.  */
5341   if (op1 == const0_rtx && (code == LT || code == GE)
5342       && GET_MODE_CLASS (mode) == MODE_INT
5343       && (normalizep || STORE_FLAG_VALUE == 1
5344           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5345               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5346                   == ((unsigned HOST_WIDE_INT) 1
5347                       << (GET_MODE_BITSIZE (mode) - 1))))))
5348     {
5349       subtarget = target;
5350
5351       if (!target)
5352         target_mode = mode;
5353
5354       /* If the result is to be wider than OP0, it is best to convert it
5355          first.  If it is to be narrower, it is *incorrect* to convert it
5356          first.  */
5357       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5358         {
5359           op0 = convert_modes (target_mode, mode, op0, 0);
5360           mode = target_mode;
5361         }
5362
5363       if (target_mode != mode)
5364         subtarget = 0;
5365
5366       if (code == GE)
5367         op0 = expand_unop (mode, one_cmpl_optab, op0,
5368                            ((STORE_FLAG_VALUE == 1 || normalizep)
5369                             ? 0 : subtarget), 0);
5370
5371       if (STORE_FLAG_VALUE == 1 || normalizep)
5372         /* If we are supposed to produce a 0/1 value, we want to do
5373            a logical shift from the sign bit to the low-order bit; for
5374            a -1/0 value, we do an arithmetic shift.  */
5375         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5376                             size_int (GET_MODE_BITSIZE (mode) - 1),
5377                             subtarget, normalizep != -1);
5378
5379       if (mode != target_mode)
5380         op0 = convert_modes (target_mode, mode, op0, 0);
5381
5382       return op0;
5383     }
5384
5385   mclass = GET_MODE_CLASS (mode);
5386   for (compare_mode = mode; compare_mode != VOIDmode;
5387        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5388     {
5389      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5390      icode = optab_handler (cstore_optab, optab_mode);
5391      if (icode != CODE_FOR_nothing)
5392         {
5393           do_pending_stack_adjust ();
5394           tem = emit_cstore (target, icode, code, mode, compare_mode,
5395                              unsignedp, op0, op1, normalizep, target_mode);
5396           if (tem)
5397             return tem;
5398
5399           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5400             {
5401               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5402                                  unsignedp, op1, op0, normalizep, target_mode);
5403               if (tem)
5404                 return tem;
5405             }
5406           break;
5407         }
5408     }
5409
5410   return 0;
5411 }
5412
5413 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5414    and storing in TARGET.  Normally return TARGET.
5415    Return 0 if that cannot be done.
5416
5417    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5418    it is VOIDmode, they cannot both be CONST_INT.
5419
5420    UNSIGNEDP is for the case where we have to widen the operands
5421    to perform the operation.  It says to use zero-extension.
5422
5423    NORMALIZEP is 1 if we should convert the result to be either zero
5424    or one.  Normalize is -1 if we should convert the result to be
5425    either zero or -1.  If NORMALIZEP is zero, the result will be left
5426    "raw" out of the scc insn.  */
5427
5428 rtx
5429 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5430                  enum machine_mode mode, int unsignedp, int normalizep)
5431 {
5432   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5433   enum rtx_code rcode;
5434   rtx subtarget;
5435   rtx tem, last, trueval;
5436
5437   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5438                            target_mode);
5439   if (tem)
5440     return tem;
5441
5442   /* If we reached here, we can't do this with a scc insn, however there
5443      are some comparisons that can be done in other ways.  Don't do any
5444      of these cases if branches are very cheap.  */
5445   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5446     return 0;
5447
5448   /* See what we need to return.  We can only return a 1, -1, or the
5449      sign bit.  */
5450
5451   if (normalizep == 0)
5452     {
5453       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5454         normalizep = STORE_FLAG_VALUE;
5455
5456       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5457                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5458                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5459         ;
5460       else
5461         return 0;
5462     }
5463
5464   last = get_last_insn ();
5465
5466   /* If optimizing, use different pseudo registers for each insn, instead
5467      of reusing the same pseudo.  This leads to better CSE, but slows
5468      down the compiler, since there are more pseudos */
5469   subtarget = (!optimize
5470                && (target_mode == mode)) ? target : NULL_RTX;
5471   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5472
5473   /* For floating-point comparisons, try the reverse comparison or try
5474      changing the "orderedness" of the comparison.  */
5475   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5476     {
5477       enum rtx_code first_code;
5478       bool and_them;
5479
5480       rcode = reverse_condition_maybe_unordered (code);
5481       if (can_compare_p (rcode, mode, ccp_store_flag)
5482           && (code == ORDERED || code == UNORDERED
5483               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5484               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5485         {
5486           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5487                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5488
5489           /* For the reverse comparison, use either an addition or a XOR.  */
5490           if (want_add
5491               && rtx_cost (GEN_INT (normalizep), PLUS,
5492                            optimize_insn_for_speed_p ()) == 0)
5493             {
5494               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5495                                        STORE_FLAG_VALUE, target_mode);
5496               if (tem)
5497                 return expand_binop (target_mode, add_optab, tem,
5498                                      GEN_INT (normalizep),
5499                                      target, 0, OPTAB_WIDEN);
5500             }
5501           else if (!want_add
5502                    && rtx_cost (trueval, XOR,
5503                                 optimize_insn_for_speed_p ()) == 0)
5504             {
5505               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5506                                        normalizep, target_mode);
5507               if (tem)
5508                 return expand_binop (target_mode, xor_optab, tem, trueval,
5509                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5510             }
5511         }
5512
5513       delete_insns_since (last);
5514
5515       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5516       if (code == ORDERED || code == UNORDERED)
5517         return 0;
5518
5519       and_them = split_comparison (code, mode, &first_code, &code);
5520
5521       /* If there are no NaNs, the first comparison should always fall through.
5522          Effectively change the comparison to the other one.  */
5523       if (!HONOR_NANS (mode))
5524         {
5525           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5526           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5527                                     target_mode);
5528         }
5529
5530 #ifdef HAVE_conditional_move
5531       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5532          conditional move.  */
5533       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5534                                normalizep, target_mode);
5535       if (tem == 0)
5536         return 0;
5537
5538       if (and_them)
5539         tem = emit_conditional_move (target, code, op0, op1, mode,
5540                                      tem, const0_rtx, GET_MODE (tem), 0);
5541       else
5542         tem = emit_conditional_move (target, code, op0, op1, mode,
5543                                      trueval, tem, GET_MODE (tem), 0);
5544
5545       if (tem == 0)
5546         delete_insns_since (last);
5547       return tem;
5548 #else
5549       return 0;
5550 #endif
5551     }
5552
5553   /* The remaining tricks only apply to integer comparisons.  */
5554
5555   if (GET_MODE_CLASS (mode) != MODE_INT)
5556     return 0;
5557
5558   /* If this is an equality comparison of integers, we can try to exclusive-or
5559      (or subtract) the two operands and use a recursive call to try the
5560      comparison with zero.  Don't do any of these cases if branches are
5561      very cheap.  */
5562
5563   if ((code == EQ || code == NE) && op1 != const0_rtx)
5564     {
5565       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5566                           OPTAB_WIDEN);
5567
5568       if (tem == 0)
5569         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5570                             OPTAB_WIDEN);
5571       if (tem != 0)
5572         tem = emit_store_flag (target, code, tem, const0_rtx,
5573                                mode, unsignedp, normalizep);
5574       if (tem != 0)
5575         return tem;
5576
5577       delete_insns_since (last);
5578     }
5579
5580   /* For integer comparisons, try the reverse comparison.  However, for
5581      small X and if we'd have anyway to extend, implementing "X != 0"
5582      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5583   rcode = reverse_condition (code);
5584   if (can_compare_p (rcode, mode, ccp_store_flag)
5585       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5586             && code == NE
5587             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5588             && op1 == const0_rtx))
5589     {
5590       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5591                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5592
5593       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5594       if (want_add
5595           && rtx_cost (GEN_INT (normalizep), PLUS,
5596                        optimize_insn_for_speed_p ()) == 0)
5597         {
5598           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5599                                    STORE_FLAG_VALUE, target_mode);
5600           if (tem != 0)
5601             tem = expand_binop (target_mode, add_optab, tem,
5602                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5603         }
5604       else if (!want_add
5605                && rtx_cost (trueval, XOR,
5606                             optimize_insn_for_speed_p ()) == 0)
5607         {
5608           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5609                                    normalizep, target_mode);
5610           if (tem != 0)
5611             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5612                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5613         }
5614
5615       if (tem != 0)
5616         return tem;
5617       delete_insns_since (last);
5618     }
5619
5620   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5621      the constant zero.  Reject all other comparisons at this point.  Only
5622      do LE and GT if branches are expensive since they are expensive on
5623      2-operand machines.  */
5624
5625   if (op1 != const0_rtx
5626       || (code != EQ && code != NE
5627           && (BRANCH_COST (optimize_insn_for_speed_p (),
5628                            false) <= 1 || (code != LE && code != GT))))
5629     return 0;
5630
5631   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5632      do the necessary operation below.  */
5633
5634   tem = 0;
5635
5636   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5637      the sign bit set.  */
5638
5639   if (code == LE)
5640     {
5641       /* This is destructive, so SUBTARGET can't be OP0.  */
5642       if (rtx_equal_p (subtarget, op0))
5643         subtarget = 0;
5644
5645       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5646                           OPTAB_WIDEN);
5647       if (tem)
5648         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5649                             OPTAB_WIDEN);
5650     }
5651
5652   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5653      number of bits in the mode of OP0, minus one.  */
5654
5655   if (code == GT)
5656     {
5657       if (rtx_equal_p (subtarget, op0))
5658         subtarget = 0;
5659
5660       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5661                           size_int (GET_MODE_BITSIZE (mode) - 1),
5662                           subtarget, 0);
5663       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5664                           OPTAB_WIDEN);
5665     }
5666
5667   if (code == EQ || code == NE)
5668     {
5669       /* For EQ or NE, one way to do the comparison is to apply an operation
5670          that converts the operand into a positive number if it is nonzero
5671          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5672          for NE we negate.  This puts the result in the sign bit.  Then we
5673          normalize with a shift, if needed.
5674
5675          Two operations that can do the above actions are ABS and FFS, so try
5676          them.  If that doesn't work, and MODE is smaller than a full word,
5677          we can use zero-extension to the wider mode (an unsigned conversion)
5678          as the operation.  */
5679
5680       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5681          that is compensated by the subsequent overflow when subtracting
5682          one / negating.  */
5683
5684       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5685         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5686       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5687         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5688       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5689         {
5690           tem = convert_modes (word_mode, mode, op0, 1);
5691           mode = word_mode;
5692         }
5693
5694       if (tem != 0)
5695         {
5696           if (code == EQ)
5697             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5698                                 0, OPTAB_WIDEN);
5699           else
5700             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5701         }
5702
5703       /* If we couldn't do it that way, for NE we can "or" the two's complement
5704          of the value with itself.  For EQ, we take the one's complement of
5705          that "or", which is an extra insn, so we only handle EQ if branches
5706          are expensive.  */
5707
5708       if (tem == 0
5709           && (code == NE
5710               || BRANCH_COST (optimize_insn_for_speed_p (),
5711                               false) > 1))
5712         {
5713           if (rtx_equal_p (subtarget, op0))
5714             subtarget = 0;
5715
5716           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5717           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5718                               OPTAB_WIDEN);
5719
5720           if (tem && code == EQ)
5721             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5722         }
5723     }
5724
5725   if (tem && normalizep)
5726     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5727                         size_int (GET_MODE_BITSIZE (mode) - 1),
5728                         subtarget, normalizep == 1);
5729
5730   if (tem)
5731     {
5732       if (!target)
5733         ;
5734       else if (GET_MODE (tem) != target_mode)
5735         {
5736           convert_move (target, tem, 0);
5737           tem = target;
5738         }
5739       else if (!subtarget)
5740         {
5741           emit_move_insn (target, tem);
5742           tem = target;
5743         }
5744     }
5745   else
5746     delete_insns_since (last);
5747
5748   return tem;
5749 }
5750
5751 /* Like emit_store_flag, but always succeeds.  */
5752
5753 rtx
5754 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5755                        enum machine_mode mode, int unsignedp, int normalizep)
5756 {
5757   rtx tem, label;
5758   rtx trueval, falseval;
5759
5760   /* First see if emit_store_flag can do the job.  */
5761   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5762   if (tem != 0)
5763     return tem;
5764
5765   if (!target)
5766     target = gen_reg_rtx (word_mode);
5767
5768   /* If this failed, we have to do this with set/compare/jump/set code.
5769      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5770   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5771   if (code == NE
5772       && GET_MODE_CLASS (mode) == MODE_INT
5773       && REG_P (target)
5774       && op0 == target
5775       && op1 == const0_rtx)
5776     {
5777       label = gen_label_rtx ();
5778       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5779                                mode, NULL_RTX, NULL_RTX, label, -1);
5780       emit_move_insn (target, trueval);
5781       emit_label (label);
5782       return target;
5783     }
5784
5785   if (!REG_P (target)
5786       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5787     target = gen_reg_rtx (GET_MODE (target));
5788
5789   /* Jump in the right direction if the target cannot implement CODE
5790      but can jump on its reverse condition.  */
5791   falseval = const0_rtx;
5792   if (! can_compare_p (code, mode, ccp_jump)
5793       && (! FLOAT_MODE_P (mode)
5794           || code == ORDERED || code == UNORDERED
5795           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5796           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5797     {
5798       enum rtx_code rcode;
5799       if (FLOAT_MODE_P (mode))
5800         rcode = reverse_condition_maybe_unordered (code);
5801       else
5802         rcode = reverse_condition (code);
5803
5804       /* Canonicalize to UNORDERED for the libcall.  */
5805       if (can_compare_p (rcode, mode, ccp_jump)
5806           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5807         {
5808           falseval = trueval;
5809           trueval = const0_rtx;
5810           code = rcode;
5811         }
5812     }
5813
5814   emit_move_insn (target, trueval);
5815   label = gen_label_rtx ();
5816   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5817                            NULL_RTX, label, -1);
5818
5819   emit_move_insn (target, falseval);
5820   emit_label (label);
5821
5822   return target;
5823 }
5824 \f
5825 /* Perform possibly multi-word comparison and conditional jump to LABEL
5826    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5827    now a thin wrapper around do_compare_rtx_and_jump.  */
5828
5829 static void
5830 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5831                  rtx label)
5832 {
5833   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5834   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5835                            NULL_RTX, NULL_RTX, label, -1);
5836 }