gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost[2];
 102 static int add_cost[2][NUM_MACHINE_MODES];
 103 static int neg_cost[2][NUM_MACHINE_MODES];
 104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140   int speed;
 141
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (speed = 0; speed < 2; speed++)
 209     {
 210       crtl->maybe_hot_insn_p = speed;
 211       zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
 212
 213       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 214            mode != VOIDmode;
 215            mode = GET_MODE_WIDER_MODE (mode))
 216         {
 217           PUT_MODE (&all.reg, mode);
 218           PUT_MODE (&all.plus, mode);
 219           PUT_MODE (&all.neg, mode);
 220           PUT_MODE (&all.mult, mode);
 221           PUT_MODE (&all.sdiv, mode);
 222           PUT_MODE (&all.udiv, mode);
 223           PUT_MODE (&all.sdiv_32, mode);
 224           PUT_MODE (&all.smod_32, mode);
 225           PUT_MODE (&all.wide_trunc, mode);
 226           PUT_MODE (&all.shift, mode);
 227           PUT_MODE (&all.shift_mult, mode);
 228           PUT_MODE (&all.shift_add, mode);
 229           PUT_MODE (&all.shift_sub, mode);
 230
 231           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 232           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 233           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 234           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 235           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 236
 237           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 238                                           <= 2 * add_cost[speed][mode]);
 239           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 240                                           <= 4 * add_cost[speed][mode]);
 241
 242           wider_mode = GET_MODE_WIDER_MODE (mode);
 243           if (wider_mode != VOIDmode)
 244             {
 245               PUT_MODE (&all.zext, wider_mode);
 246               PUT_MODE (&all.wide_mult, wider_mode);
 247               PUT_MODE (&all.wide_lshr, wider_mode);
 248               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 249
 250               mul_widen_cost[speed][wider_mode]
 251                 = rtx_cost (&all.wide_mult, SET, speed);
 252               mul_highpart_cost[speed][mode]
 253                 = rtx_cost (&all.wide_trunc, SET, speed);
 254             }
 255
 256           shift_cost[speed][mode][0] = 0;
 257           shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
 258             = add_cost[speed][mode];
 259
 260           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 261           for (m = 1; m < n; m++)
 262             {
 263               XEXP (&all.shift, 1) = cint[m];
 264               XEXP (&all.shift_mult, 1) = pow2[m];
 265
 266               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 267               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 268               shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
 269             }
 270         }
 271     }
 272   default_rtl_profile ();
 273 }
 274
 275 /* Return an rtx representing minus the value of X.
 276    MODE is the intended mode of the result,
 277    useful if X is a CONST_INT.  */
 278
 279 rtx
 280 negate_rtx (enum machine_mode mode, rtx x)
 281 {
 282   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 283
 284   if (result == 0)
 285     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 286
 287   return result;
 288 }
 289
 290 /* Report on the availability of insv/extv/extzv and the desired mode
 291    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 292    is false; else the mode of the specified operand.  If OPNO is -1,
 293    all the caller cares about is whether the insn is available.  */
 294 enum machine_mode
 295 mode_for_extraction (enum extraction_pattern pattern, int opno)
 296 {
 297   const struct insn_data *data;
 298
 299   switch (pattern)
 300     {
 301     case EP_insv:
 302       if (HAVE_insv)
 303         {
 304           data = &insn_data[CODE_FOR_insv];
 305           break;
 306         }
 307       return MAX_MACHINE_MODE;
 308
 309     case EP_extv:
 310       if (HAVE_extv)
 311         {
 312           data = &insn_data[CODE_FOR_extv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     case EP_extzv:
 318       if (HAVE_extzv)
 319         {
 320           data = &insn_data[CODE_FOR_extzv];
 321           break;
 322         }
 323       return MAX_MACHINE_MODE;
 324
 325     default:
 326       gcc_unreachable ();
 327     }
 328
 329   if (opno == -1)
 330     return VOIDmode;
 331
 332   /* Everyone who uses this function used to follow it with
 333      if (result == VOIDmode) result = word_mode; */
 334   if (data->operand[opno].mode == VOIDmode)
 335     return word_mode;
 336   return data->operand[opno].mode;
 337 }
 338
 339 /* Return true if X, of mode MODE, matches the predicate for operand
 340    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 341    the ambient volatile_ok setting.  */
 342
 343 static bool
 344 check_predicate_volatile_ok (enum insn_code icode, int opno,
 345                              rtx x, enum machine_mode mode)
 346 {
 347   bool save_volatile_ok, result;
 348
 349   save_volatile_ok = volatile_ok;
 350   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 351   volatile_ok = save_volatile_ok;
 352   return result;
 353 }
 354 \f
 355 /* A subroutine of store_bit_field, with the same arguments.  Return true
 356    if the operation could be implemented.
 357
 358    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 359    no other way of implementing the operation.  If FALLBACK_P is false,
 360    return false instead.  */
 361
 362 static bool
 363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 364                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 365                    rtx value, bool fallback_p)
 366 {
 367   unsigned int unit
 368     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 369   unsigned HOST_WIDE_INT offset, bitpos;
 370   rtx op0 = str_rtx;
 371   int byte_offset;
 372   rtx orig_value;
 373
 374   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 375
 376   while (GET_CODE (op0) == SUBREG)
 377     {
 378       /* The following line once was done only if WORDS_BIG_ENDIAN,
 379          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 380          meaningful at a much higher level; when structures are copied
 381          between memory and regs, the higher-numbered regs
 382          always get higher addresses.  */
 383       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 384       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 385
 386       byte_offset = 0;
 387
 388       /* Paradoxical subregs need special handling on big endian machines.  */
 389       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 390         {
 391           int difference = inner_mode_size - outer_mode_size;
 392
 393           if (WORDS_BIG_ENDIAN)
 394             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 395           if (BYTES_BIG_ENDIAN)
 396             byte_offset += difference % UNITS_PER_WORD;
 397         }
 398       else
 399         byte_offset = SUBREG_BYTE (op0);
 400
 401       bitnum += byte_offset * BITS_PER_UNIT;
 402       op0 = SUBREG_REG (op0);
 403     }
 404
 405   /* No action is needed if the target is a register and if the field
 406      lies completely outside that register.  This can occur if the source
 407      code contains an out-of-bounds access to a small array.  */
 408   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 409     return true;
 410
 411   /* Use vec_set patterns for inserting parts of vectors whenever
 412      available.  */
 413   if (VECTOR_MODE_P (GET_MODE (op0))
 414       && !MEM_P (op0)
 415       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 416           != CODE_FOR_nothing)
 417       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 418       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 419       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 420     {
 421       enum machine_mode outermode = GET_MODE (op0);
 422       enum machine_mode innermode = GET_MODE_INNER (outermode);
 423       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 424       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 425       rtx rtxpos = GEN_INT (pos);
 426       rtx src = value;
 427       rtx dest = op0;
 428       rtx pat, seq;
 429       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 430       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 431       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 432
 433       start_sequence ();
 434
 435       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 436         src = copy_to_mode_reg (mode1, src);
 437
 438       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 439         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 440
 441       /* We could handle this, but we should always be called with a pseudo
 442          for our targets and all insns should take them as outputs.  */
 443       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 444                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 445                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 446       pat = GEN_FCN (icode) (dest, src, rtxpos);
 447       seq = get_insns ();
 448       end_sequence ();
 449       if (pat)
 450         {
 451           emit_insn (seq);
 452           emit_insn (pat);
 453           return true;
 454         }
 455     }
 456
 457   /* If the target is a register, overwriting the entire object, or storing
 458      a full-word or multi-word field can be done with just a SUBREG.
 459
 460      If the target is memory, storing any naturally aligned field can be
 461      done with a simple store.  For targets that support fast unaligned
 462      memory, any naturally sized, unit aligned field can be done directly.  */
 463
 464   offset = bitnum / unit;
 465   bitpos = bitnum % unit;
 466   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 467                 + (offset * UNITS_PER_WORD);
 468
 469   if (bitpos == 0
 470       && bitsize == GET_MODE_BITSIZE (fieldmode)
 471       && (!MEM_P (op0)
 472           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 473              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 474              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 475           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 476              || (offset * BITS_PER_UNIT % bitsize == 0
 477                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 478     {
 479       if (MEM_P (op0))
 480         op0 = adjust_address (op0, fieldmode, offset);
 481       else if (GET_MODE (op0) != fieldmode)
 482         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 483                                    byte_offset);
 484       emit_move_insn (op0, value);
 485       return true;
 486     }
 487
 488   /* Make sure we are playing with integral modes.  Pun with subregs
 489      if we aren't.  This must come after the entire register case above,
 490      since that case is valid for any mode.  The following cases are only
 491      valid for integral modes.  */
 492   {
 493     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 494     if (imode != GET_MODE (op0))
 495       {
 496         if (MEM_P (op0))
 497           op0 = adjust_address (op0, imode, 0);
 498         else
 499           {
 500             gcc_assert (imode != BLKmode);
 501             op0 = gen_lowpart (imode, op0);
 502           }
 503       }
 504   }
 505
 506   /* We may be accessing data outside the field, which means
 507      we can alias adjacent data.  */
 508   if (MEM_P (op0))
 509     {
 510       op0 = shallow_copy_rtx (op0);
 511       set_mem_alias_set (op0, 0);
 512       set_mem_expr (op0, 0);
 513     }
 514
 515   /* If OP0 is a register, BITPOS must count within a word.
 516      But as we have it, it counts within whatever size OP0 now has.
 517      On a bigendian machine, these are not the same, so convert.  */
 518   if (BYTES_BIG_ENDIAN
 519       && !MEM_P (op0)
 520       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 521     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 522
 523   /* Storing an lsb-aligned field in a register
 524      can be done with a movestrict instruction.  */
 525
 526   if (!MEM_P (op0)
 527       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 528       && bitsize == GET_MODE_BITSIZE (fieldmode)
 529       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 530           != CODE_FOR_nothing))
 531     {
 532       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 533       rtx insn;
 534       rtx start = get_last_insn ();
 535
 536       /* Get appropriate low part of the value being stored.  */
 537       if (GET_CODE (value) == CONST_INT || REG_P (value))
 538         value = gen_lowpart (fieldmode, value);
 539       else if (!(GET_CODE (value) == SYMBOL_REF
 540                  || GET_CODE (value) == LABEL_REF
 541                  || GET_CODE (value) == CONST))
 542         value = convert_to_mode (fieldmode, value, 0);
 543
 544       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 545         value = copy_to_mode_reg (fieldmode, value);
 546
 547       if (GET_CODE (op0) == SUBREG)
 548         {
 549           /* Else we've got some float mode source being extracted into
 550              a different float mode destination -- this combination of
 551              subregs results in Severe Tire Damage.  */
 552           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 553                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 554                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 555           op0 = SUBREG_REG (op0);
 556         }
 557
 558       insn = (GEN_FCN (icode)
 559                  (gen_rtx_SUBREG (fieldmode, op0,
 560                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 561                                   + (offset * UNITS_PER_WORD)),
 562                                   value));
 563       if (insn)
 564         {
 565           emit_insn (insn);
 566           return true;
 567         }
 568       delete_insns_since (start);
 569     }
 570
 571   /* Handle fields bigger than a word.  */
 572
 573   if (bitsize > BITS_PER_WORD)
 574     {
 575       /* Here we transfer the words of the field
 576          in the order least significant first.
 577          This is because the most significant word is the one which may
 578          be less than full.
 579          However, only do that if the value is not BLKmode.  */
 580
 581       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 582       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 583       unsigned int i;
 584       rtx last;
 585
 586       /* This is the mode we must force value to, so that there will be enough
 587          subwords to extract.  Note that fieldmode will often (always?) be
 588          VOIDmode, because that is what store_field uses to indicate that this
 589          is a bit field, but passing VOIDmode to operand_subword_force
 590          is not allowed.  */
 591       fieldmode = GET_MODE (value);
 592       if (fieldmode == VOIDmode)
 593         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 594
 595       last = get_last_insn ();
 596       for (i = 0; i < nwords; i++)
 597         {
 598           /* If I is 0, use the low-order word in both field and target;
 599              if I is 1, use the next to lowest word; and so on.  */
 600           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 601           unsigned int bit_offset = (backwards
 602                                      ? MAX ((int) bitsize - ((int) i + 1)
 603                                             * BITS_PER_WORD,
 604                                             0)
 605                                      : (int) i * BITS_PER_WORD);
 606           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 607
 608           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 609                                             bitsize - i * BITS_PER_WORD),
 610                                   bitnum + bit_offset, word_mode,
 611                                   value_word, fallback_p))
 612             {
 613               delete_insns_since (last);
 614               return false;
 615             }
 616         }
 617       return true;
 618     }
 619
 620   /* From here on we can assume that the field to be stored in is
 621      a full-word (whatever type that is), since it is shorter than a word.  */
 622
 623   /* OFFSET is the number of words or bytes (UNIT says which)
 624      from STR_RTX to the first word or byte containing part of the field.  */
 625
 626   if (!MEM_P (op0))
 627     {
 628       if (offset != 0
 629           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 630         {
 631           if (!REG_P (op0))
 632             {
 633               /* Since this is a destination (lvalue), we can't copy
 634                  it to a pseudo.  We can remove a SUBREG that does not
 635                  change the size of the operand.  Such a SUBREG may
 636                  have been added above.  */
 637               gcc_assert (GET_CODE (op0) == SUBREG
 638                           && (GET_MODE_SIZE (GET_MODE (op0))
 639                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 640               op0 = SUBREG_REG (op0);
 641             }
 642           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 643                                 op0, (offset * UNITS_PER_WORD));
 644         }
 645       offset = 0;
 646     }
 647
 648   /* If VALUE has a floating-point or complex mode, access it as an
 649      integer of the corresponding size.  This can occur on a machine
 650      with 64 bit registers that uses SFmode for float.  It can also
 651      occur for unaligned float or complex fields.  */
 652   orig_value = value;
 653   if (GET_MODE (value) != VOIDmode
 654       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 655       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 656     {
 657       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 658       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 659     }
 660
 661   /* Now OFFSET is nonzero only if OP0 is memory
 662      and is therefore always measured in bytes.  */
 663
 664   if (HAVE_insv
 665       && GET_MODE (value) != BLKmode
 666       && bitsize > 0
 667       && GET_MODE_BITSIZE (op_mode) >= bitsize
 668       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 669             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 670       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 671                                                         VOIDmode)
 672       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 673     {
 674       int xbitpos = bitpos;
 675       rtx value1;
 676       rtx xop0 = op0;
 677       rtx last = get_last_insn ();
 678       rtx pat;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in OP_MODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 691         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (op_mode);
 705
 706       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != op_mode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (op_mode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (op_mode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), op_mode);
 733           else
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             gcc_assert (CONSTANT_P (value));
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, op_mode)))
 745         value1 = force_reg (op_mode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         {
 750           emit_insn (pat);
 751           return true;
 752         }
 753       delete_insns_since (last);
 754     }
 755
 756   /* If OP0 is a memory, try copying it to a register and seeing if a
 757      cheap register alternative is available.  */
 758   if (HAVE_insv && MEM_P (op0))
 759     {
 760       enum machine_mode bestmode;
 761
 762       /* Get the mode to use for inserting into this field.  If OP0 is
 763          BLKmode, get the smallest mode consistent with the alignment. If
 764          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 765          mode. Otherwise, use the smallest mode containing the field.  */
 766
 767       if (GET_MODE (op0) == BLKmode
 768           || (op_mode != MAX_MACHINE_MODE
 769               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 770         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 771                                   (op_mode == MAX_MACHINE_MODE
 772                                    ? VOIDmode : op_mode),
 773                                   MEM_VOLATILE_P (op0));
 774       else
 775         bestmode = GET_MODE (op0);
 776
 777       if (bestmode != VOIDmode
 778           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 779           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 780                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 781         {
 782           rtx last, tempreg, xop0;
 783           unsigned HOST_WIDE_INT xoffset, xbitpos;
 784
 785           last = get_last_insn ();
 786
 787           /* Adjust address to point to the containing unit of
 788              that mode.  Compute the offset as a multiple of this unit,
 789              counting in bytes.  */
 790           unit = GET_MODE_BITSIZE (bestmode);
 791           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 792           xbitpos = bitnum % unit;
 793           xop0 = adjust_address (op0, bestmode, xoffset);
 794
 795           /* Fetch that unit, store the bitfield in it, then store
 796              the unit.  */
 797           tempreg = copy_to_reg (xop0);
 798           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 799                                  fieldmode, orig_value, false))
 800             {
 801               emit_move_insn (xop0, tempreg);
 802               return true;
 803             }
 804           delete_insns_since (last);
 805         }
 806     }
 807
 808   if (!fallback_p)
 809     return false;
 810
 811   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 812   return true;
 813 }
 814
 815 /* Generate code to store value from rtx VALUE
 816    into a bit-field within structure STR_RTX
 817    containing BITSIZE bits starting at bit BITNUM.
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 823                  rtx value)
 824 {
 825   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 826     gcc_unreachable ();
 827 }
 828 \f
 829 /* Use shifts and boolean operations to store VALUE
 830    into a bit field of width BITSIZE
 831    in a memory location specified by OP0 except offset by OFFSET bytes.
 832      (OFFSET must be 0 if OP0 is a register.)
 833    The field starts at position BITPOS within the byte.
 834     (If OP0 is a register, it may be a full word or a narrower mode,
 835      but BITPOS still counts within a full word,
 836      which is significant on bigendian machines.)  */
 837
 838 static void
 839 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 840                        unsigned HOST_WIDE_INT bitsize,
 841                        unsigned HOST_WIDE_INT bitpos, rtx value)
 842 {
 843   enum machine_mode mode;
 844   unsigned int total_bits = BITS_PER_WORD;
 845   rtx temp;
 846   int all_zero = 0;
 847   int all_one = 0;
 848
 849   /* There is a case not handled here:
 850      a structure with a known alignment of just a halfword
 851      and a field split across two aligned halfwords within the structure.
 852      Or likewise a structure with a known alignment of just a byte
 853      and a field split across two bytes.
 854      Such cases are not supposed to be able to occur.  */
 855
 856   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 857     {
 858       gcc_assert (!offset);
 859       /* Special treatment for a bit field split across two registers.  */
 860       if (bitsize + bitpos > BITS_PER_WORD)
 861         {
 862           store_split_bit_field (op0, bitsize, bitpos, value);
 863           return;
 864         }
 865     }
 866   else
 867     {
 868       /* Get the proper mode to use for this field.  We want a mode that
 869          includes the entire field.  If such a mode would be larger than
 870          a word, we won't be doing the extraction the normal way.
 871          We don't want a mode bigger than the destination.  */
 872
 873       mode = GET_MODE (op0);
 874       if (GET_MODE_BITSIZE (mode) == 0
 875           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 876         mode = word_mode;
 877       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 878                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 879
 880       if (mode == VOIDmode)
 881         {
 882           /* The only way this should occur is if the field spans word
 883              boundaries.  */
 884           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 885                                  value);
 886           return;
 887         }
 888
 889       total_bits = GET_MODE_BITSIZE (mode);
 890
 891       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 892          be in the range 0 to total_bits-1, and put any excess bytes in
 893          OFFSET.  */
 894       if (bitpos >= total_bits)
 895         {
 896           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 897           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 898                      * BITS_PER_UNIT);
 899         }
 900
 901       /* Get ref to an aligned byte, halfword, or word containing the field.
 902          Adjust BITPOS to be position within a word,
 903          and OFFSET to be the offset of that word.
 904          Then alter OP0 to refer to that word.  */
 905       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 906       offset -= (offset % (total_bits / BITS_PER_UNIT));
 907       op0 = adjust_address (op0, mode, offset);
 908     }
 909
 910   mode = GET_MODE (op0);
 911
 912   /* Now MODE is either some integral mode for a MEM as OP0,
 913      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 914      The bit field is contained entirely within OP0.
 915      BITPOS is the starting bit number within OP0.
 916      (OP0's mode may actually be narrower than MODE.)  */
 917
 918   if (BYTES_BIG_ENDIAN)
 919       /* BITPOS is the distance between our msb
 920          and that of the containing datum.
 921          Convert it to the distance from the lsb.  */
 922       bitpos = total_bits - bitsize - bitpos;
 923
 924   /* Now BITPOS is always the distance between our lsb
 925      and that of OP0.  */
 926
 927   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 928      we must first convert its mode to MODE.  */
 929
 930   if (GET_CODE (value) == CONST_INT)
 931     {
 932       HOST_WIDE_INT v = INTVAL (value);
 933
 934       if (bitsize < HOST_BITS_PER_WIDE_INT)
 935         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 936
 937       if (v == 0)
 938         all_zero = 1;
 939       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 940                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 941                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 942         all_one = 1;
 943
 944       value = lshift_value (mode, value, bitpos, bitsize);
 945     }
 946   else
 947     {
 948       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 949                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 950
 951       if (GET_MODE (value) != mode)
 952         {
 953           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 954               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 955             value = gen_lowpart (mode, value);
 956           else
 957             value = convert_to_mode (mode, value, 1);
 958         }
 959
 960       if (must_and)
 961         value = expand_binop (mode, and_optab, value,
 962                               mask_rtx (mode, 0, bitsize, 0),
 963                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 964       if (bitpos > 0)
 965         value = expand_shift (LSHIFT_EXPR, mode, value,
 966                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 967     }
 968
 969   /* Now clear the chosen bits in OP0,
 970      except that if VALUE is -1 we need not bother.  */
 971   /* We keep the intermediates in registers to allow CSE to combine
 972      consecutive bitfield assignments.  */
 973
 974   temp = force_reg (mode, op0);
 975
 976   if (! all_one)
 977     {
 978       temp = expand_binop (mode, and_optab, temp,
 979                            mask_rtx (mode, bitpos, bitsize, 1),
 980                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 981       temp = force_reg (mode, temp);
 982     }
 983
 984   /* Now logical-or VALUE into OP0, unless it is zero.  */
 985
 986   if (! all_zero)
 987     {
 988       temp = expand_binop (mode, ior_optab, temp, value,
 989                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 990       temp = force_reg (mode, temp);
 991     }
 992
 993   if (op0 != temp)
 994     {
 995       op0 = copy_rtx (op0);
 996       emit_move_insn (op0, temp);
 997     }
 998 }
 999 \f
1000 /* Store a bit field that is split across multiple accessible memory objects.
1001
1002    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1003    BITSIZE is the field width; BITPOS the position of its first bit
1004    (within the word).
1005    VALUE is the value to store.
1006
1007    This does not yet handle fields wider than BITS_PER_WORD.  */
1008
1009 static void
1010 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011                        unsigned HOST_WIDE_INT bitpos, rtx value)
1012 {
1013   unsigned int unit;
1014   unsigned int bitsdone = 0;
1015
1016   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1017      much at a time.  */
1018   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1019     unit = BITS_PER_WORD;
1020   else
1021     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1022
1023   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1024      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1025      that VALUE might be a floating-point constant.  */
1026   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1027     {
1028       rtx word = gen_lowpart_common (word_mode, value);
1029
1030       if (word && (value != word))
1031         value = word;
1032       else
1033         value = gen_lowpart_common (word_mode,
1034                                     force_reg (GET_MODE (value) != VOIDmode
1035                                                ? GET_MODE (value)
1036                                                : word_mode, value));
1037     }
1038
1039   while (bitsdone < bitsize)
1040     {
1041       unsigned HOST_WIDE_INT thissize;
1042       rtx part, word;
1043       unsigned HOST_WIDE_INT thispos;
1044       unsigned HOST_WIDE_INT offset;
1045
1046       offset = (bitpos + bitsdone) / unit;
1047       thispos = (bitpos + bitsdone) % unit;
1048
1049       /* THISSIZE must not overrun a word boundary.  Otherwise,
1050          store_fixed_bit_field will call us again, and we will mutually
1051          recurse forever.  */
1052       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1053       thissize = MIN (thissize, unit - thispos);
1054
1055       if (BYTES_BIG_ENDIAN)
1056         {
1057           int total_bits;
1058
1059           /* We must do an endian conversion exactly the same way as it is
1060              done in extract_bit_field, so that the two calls to
1061              extract_fixed_bit_field will have comparable arguments.  */
1062           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1063             total_bits = BITS_PER_WORD;
1064           else
1065             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1066
1067           /* Fetch successively less significant portions.  */
1068           if (GET_CODE (value) == CONST_INT)
1069             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1070                              >> (bitsize - bitsdone - thissize))
1071                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1072           else
1073             /* The args are chosen so that the last part includes the
1074                lsb.  Give extract_bit_field the value it needs (with
1075                endianness compensation) to fetch the piece we want.  */
1076             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1077                                             total_bits - bitsize + bitsdone,
1078                                             NULL_RTX, 1);
1079         }
1080       else
1081         {
1082           /* Fetch successively more significant portions.  */
1083           if (GET_CODE (value) == CONST_INT)
1084             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1085                              >> bitsdone)
1086                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1087           else
1088             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1089                                             bitsdone, NULL_RTX, 1);
1090         }
1091
1092       /* If OP0 is a register, then handle OFFSET here.
1093
1094          When handling multiword bitfields, extract_bit_field may pass
1095          down a word_mode SUBREG of a larger REG for a bitfield that actually
1096          crosses a word boundary.  Thus, for a SUBREG, we must find
1097          the current word starting from the base register.  */
1098       if (GET_CODE (op0) == SUBREG)
1099         {
1100           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1101           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1102                                         GET_MODE (SUBREG_REG (op0)));
1103           offset = 0;
1104         }
1105       else if (REG_P (op0))
1106         {
1107           word = operand_subword_force (op0, offset, GET_MODE (op0));
1108           offset = 0;
1109         }
1110       else
1111         word = op0;
1112
1113       /* OFFSET is in UNITs, and UNIT is in bits.
1114          store_fixed_bit_field wants offset in bytes.  */
1115       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1116                              thispos, part);
1117       bitsdone += thissize;
1118     }
1119 }
1120 \f
1121 /* A subroutine of extract_bit_field_1 that converts return value X
1122    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1123    to extract_bit_field.  */
1124
1125 static rtx
1126 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1127                              enum machine_mode tmode, bool unsignedp)
1128 {
1129   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1130     return x;
1131
1132   /* If the x mode is not a scalar integral, first convert to the
1133      integer mode of that size and then access it as a floating-point
1134      value via a SUBREG.  */
1135   if (!SCALAR_INT_MODE_P (tmode))
1136     {
1137       enum machine_mode smode;
1138
1139       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1140       x = convert_to_mode (smode, x, unsignedp);
1141       x = force_reg (smode, x);
1142       return gen_lowpart (tmode, x);
1143     }
1144
1145   return convert_to_mode (tmode, x, unsignedp);
1146 }
1147
1148 /* A subroutine of extract_bit_field, with the same arguments.
1149    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1150    if we can find no other means of implementing the operation.
1151    if FALLBACK_P is false, return NULL instead.  */
1152
1153 static rtx
1154 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1155                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1156                      enum machine_mode mode, enum machine_mode tmode,
1157                      bool fallback_p)
1158 {
1159   unsigned int unit
1160     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1161   unsigned HOST_WIDE_INT offset, bitpos;
1162   rtx op0 = str_rtx;
1163   enum machine_mode int_mode;
1164   enum machine_mode ext_mode;
1165   enum machine_mode mode1;
1166   enum insn_code icode;
1167   int byte_offset;
1168
1169   if (tmode == VOIDmode)
1170     tmode = mode;
1171
1172   while (GET_CODE (op0) == SUBREG)
1173     {
1174       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1175       op0 = SUBREG_REG (op0);
1176     }
1177
1178   /* If we have an out-of-bounds access to a register, just return an
1179      uninitialized register of the required mode.  This can occur if the
1180      source code contains an out-of-bounds access to a small array.  */
1181   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1182     return gen_reg_rtx (tmode);
1183
1184   if (REG_P (op0)
1185       && mode == GET_MODE (op0)
1186       && bitnum == 0
1187       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1188     {
1189       /* We're trying to extract a full register from itself.  */
1190       return op0;
1191     }
1192
1193   /* See if we can get a better vector mode before extracting.  */
1194   if (VECTOR_MODE_P (GET_MODE (op0))
1195       && !MEM_P (op0)
1196       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1197     {
1198       enum machine_mode new_mode;
1199       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1200
1201       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1202         new_mode = MIN_MODE_VECTOR_FLOAT;
1203       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1204         new_mode = MIN_MODE_VECTOR_FRACT;
1205       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1206         new_mode = MIN_MODE_VECTOR_UFRACT;
1207       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1208         new_mode = MIN_MODE_VECTOR_ACCUM;
1209       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1210         new_mode = MIN_MODE_VECTOR_UACCUM;
1211       else
1212         new_mode = MIN_MODE_VECTOR_INT;
1213
1214       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1215         if (GET_MODE_NUNITS (new_mode) == nunits
1216             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1217             && targetm.vector_mode_supported_p (new_mode))
1218           break;
1219       if (new_mode != VOIDmode)
1220         op0 = gen_lowpart (new_mode, op0);
1221     }
1222
1223   /* Use vec_extract patterns for extracting parts of vectors whenever
1224      available.  */
1225   if (VECTOR_MODE_P (GET_MODE (op0))
1226       && !MEM_P (op0)
1227       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1228           != CODE_FOR_nothing)
1229       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1230           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1231     {
1232       enum machine_mode outermode = GET_MODE (op0);
1233       enum machine_mode innermode = GET_MODE_INNER (outermode);
1234       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1235       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1236       rtx rtxpos = GEN_INT (pos);
1237       rtx src = op0;
1238       rtx dest = NULL, pat, seq;
1239       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1240       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1241       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1242
1243       if (innermode == tmode || innermode == mode)
1244         dest = target;
1245
1246       if (!dest)
1247         dest = gen_reg_rtx (innermode);
1248
1249       start_sequence ();
1250
1251       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1252         dest = copy_to_mode_reg (mode0, dest);
1253
1254       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1255         src = copy_to_mode_reg (mode1, src);
1256
1257       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1258         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1259
1260       /* We could handle this, but we should always be called with a pseudo
1261          for our targets and all insns should take them as outputs.  */
1262       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1263                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1264                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1265
1266       pat = GEN_FCN (icode) (dest, src, rtxpos);
1267       seq = get_insns ();
1268       end_sequence ();
1269       if (pat)
1270         {
1271           emit_insn (seq);
1272           emit_insn (pat);
1273           if (mode0 != mode)
1274             return gen_lowpart (tmode, dest);
1275           return dest;
1276         }
1277     }
1278
1279   /* Make sure we are playing with integral modes.  Pun with subregs
1280      if we aren't.  */
1281   {
1282     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1283     if (imode != GET_MODE (op0))
1284       {
1285         if (MEM_P (op0))
1286           op0 = adjust_address (op0, imode, 0);
1287         else
1288           {
1289             gcc_assert (imode != BLKmode);
1290             op0 = gen_lowpart (imode, op0);
1291
1292             /* If we got a SUBREG, force it into a register since we
1293                aren't going to be able to do another SUBREG on it.  */
1294             if (GET_CODE (op0) == SUBREG)
1295               op0 = force_reg (imode, op0);
1296           }
1297       }
1298   }
1299
1300   /* We may be accessing data outside the field, which means
1301      we can alias adjacent data.  */
1302   if (MEM_P (op0))
1303     {
1304       op0 = shallow_copy_rtx (op0);
1305       set_mem_alias_set (op0, 0);
1306       set_mem_expr (op0, 0);
1307     }
1308
1309   /* Extraction of a full-word or multi-word value from a structure
1310      in a register or aligned memory can be done with just a SUBREG.
1311      A subword value in the least significant part of a register
1312      can also be extracted with a SUBREG.  For this, we need the
1313      byte offset of the value in op0.  */
1314
1315   bitpos = bitnum % unit;
1316   offset = bitnum / unit;
1317   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1318
1319   /* If OP0 is a register, BITPOS must count within a word.
1320      But as we have it, it counts within whatever size OP0 now has.
1321      On a bigendian machine, these are not the same, so convert.  */
1322   if (BYTES_BIG_ENDIAN
1323       && !MEM_P (op0)
1324       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1325     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1326
1327   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1328      If that's wrong, the solution is to test for it and set TARGET to 0
1329      if needed.  */
1330
1331   /* Only scalar integer modes can be converted via subregs.  There is an
1332      additional problem for FP modes here in that they can have a precision
1333      which is different from the size.  mode_for_size uses precision, but
1334      we want a mode based on the size, so we must avoid calling it for FP
1335      modes.  */
1336   mode1  = (SCALAR_INT_MODE_P (tmode)
1337             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1338             : mode);
1339
1340   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1341         && bitpos % BITS_PER_WORD == 0)
1342        || (mode1 != BLKmode
1343            /* ??? The big endian test here is wrong.  This is correct
1344               if the value is in a register, and if mode_for_size is not
1345               the same mode as op0.  This causes us to get unnecessarily
1346               inefficient code from the Thumb port when -mbig-endian.  */
1347            && (BYTES_BIG_ENDIAN
1348                ? bitpos + bitsize == BITS_PER_WORD
1349                : bitpos == 0)))
1350       && ((!MEM_P (op0)
1351            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1352                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1353            && GET_MODE_SIZE (mode1) != 0
1354            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1355           || (MEM_P (op0)
1356               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1357                   || (offset * BITS_PER_UNIT % bitsize == 0
1358                       && MEM_ALIGN (op0) % bitsize == 0)))))
1359     {
1360       if (MEM_P (op0))
1361         op0 = adjust_address (op0, mode1, offset);
1362       else if (mode1 != GET_MODE (op0))
1363         {
1364           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1365                                          byte_offset);
1366           if (sub == NULL)
1367             goto no_subreg_mode_swap;
1368           op0 = sub;
1369         }
1370       if (mode1 != mode)
1371         return convert_to_mode (tmode, op0, unsignedp);
1372       return op0;
1373     }
1374  no_subreg_mode_swap:
1375
1376   /* Handle fields bigger than a word.  */
1377
1378   if (bitsize > BITS_PER_WORD)
1379     {
1380       /* Here we transfer the words of the field
1381          in the order least significant first.
1382          This is because the most significant word is the one which may
1383          be less than full.  */
1384
1385       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1386       unsigned int i;
1387
1388       if (target == 0 || !REG_P (target))
1389         target = gen_reg_rtx (mode);
1390
1391       /* Indicate for flow that the entire target reg is being set.  */
1392       emit_clobber (target);
1393
1394       for (i = 0; i < nwords; i++)
1395         {
1396           /* If I is 0, use the low-order word in both field and target;
1397              if I is 1, use the next to lowest word; and so on.  */
1398           /* Word number in TARGET to use.  */
1399           unsigned int wordnum
1400             = (WORDS_BIG_ENDIAN
1401                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1402                : i);
1403           /* Offset from start of field in OP0.  */
1404           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1405                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1406                                                 * (int) BITS_PER_WORD))
1407                                      : (int) i * BITS_PER_WORD);
1408           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1409           rtx result_part
1410             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1411                                            bitsize - i * BITS_PER_WORD),
1412                                  bitnum + bit_offset, 1, target_part, mode,
1413                                  word_mode);
1414
1415           gcc_assert (target_part);
1416
1417           if (result_part != target_part)
1418             emit_move_insn (target_part, result_part);
1419         }
1420
1421       if (unsignedp)
1422         {
1423           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1424              need to be zero'd out.  */
1425           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1426             {
1427               unsigned int i, total_words;
1428
1429               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1430               for (i = nwords; i < total_words; i++)
1431                 emit_move_insn
1432                   (operand_subword (target,
1433                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1434                                     1, VOIDmode),
1435                    const0_rtx);
1436             }
1437           return target;
1438         }
1439
1440       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1441       target = expand_shift (LSHIFT_EXPR, mode, target,
1442                              build_int_cst (NULL_TREE,
1443                                             GET_MODE_BITSIZE (mode) - bitsize),
1444                              NULL_RTX, 0);
1445       return expand_shift (RSHIFT_EXPR, mode, target,
1446                            build_int_cst (NULL_TREE,
1447                                           GET_MODE_BITSIZE (mode) - bitsize),
1448                            NULL_RTX, 0);
1449     }
1450
1451   /* From here on we know the desired field is smaller than a word.  */
1452
1453   /* Check if there is a correspondingly-sized integer field, so we can
1454      safely extract it as one size of integer, if necessary; then
1455      truncate or extend to the size that is wanted; then use SUBREGs or
1456      convert_to_mode to get one of the modes we really wanted.  */
1457
1458   int_mode = int_mode_for_mode (tmode);
1459   if (int_mode == BLKmode)
1460     int_mode = int_mode_for_mode (mode);
1461   /* Should probably push op0 out to memory and then do a load.  */
1462   gcc_assert (int_mode != BLKmode);
1463
1464   /* OFFSET is the number of words or bytes (UNIT says which)
1465      from STR_RTX to the first word or byte containing part of the field.  */
1466   if (!MEM_P (op0))
1467     {
1468       if (offset != 0
1469           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1470         {
1471           if (!REG_P (op0))
1472             op0 = copy_to_reg (op0);
1473           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1474                                 op0, (offset * UNITS_PER_WORD));
1475         }
1476       offset = 0;
1477     }
1478
1479   /* Now OFFSET is nonzero only for memory operands.  */
1480   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1481   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1482   if (ext_mode != MAX_MACHINE_MODE
1483       && bitsize > 0
1484       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486          acceptable to the format of ext(z)v.  */
1487       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1489            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1490       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1491     {
1492       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1493       rtx bitsize_rtx, bitpos_rtx;
1494       rtx last = get_last_insn ();
1495       rtx xop0 = op0;
1496       rtx xtarget = target;
1497       rtx xspec_target = target;
1498       rtx xspec_target_subreg = 0;
1499       rtx pat;
1500
1501       /* If op0 is a register, we need it in EXT_MODE to make it
1502          acceptable to the format of ext(z)v.  */
1503       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1504         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1505       if (MEM_P (xop0))
1506         /* Get ref to first byte containing part of the field.  */
1507         xop0 = adjust_address (xop0, byte_mode, xoffset);
1508
1509       /* On big-endian machines, we count bits from the most significant.
1510          If the bit field insn does not, we must invert.  */
1511       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1512         xbitpos = unit - bitsize - xbitpos;
1513
1514       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1515       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1516         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1517
1518       unit = GET_MODE_BITSIZE (ext_mode);
1519
1520       if (xtarget == 0)
1521         xtarget = xspec_target = gen_reg_rtx (tmode);
1522
1523       if (GET_MODE (xtarget) != ext_mode)
1524         {
1525           if (REG_P (xtarget))
1526             {
1527               xtarget = gen_lowpart (ext_mode, xtarget);
1528               if (GET_MODE_SIZE (ext_mode)
1529                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1530                 xspec_target_subreg = xtarget;
1531             }
1532           else
1533             xtarget = gen_reg_rtx (ext_mode);
1534         }
1535
1536       /* If this machine's ext(z)v insists on a register target,
1537          make sure we have one.  */
1538       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1539         xtarget = gen_reg_rtx (ext_mode);
1540
1541       bitsize_rtx = GEN_INT (bitsize);
1542       bitpos_rtx = GEN_INT (xbitpos);
1543
1544       pat = (unsignedp
1545              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1546              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1547       if (pat)
1548         {
1549           emit_insn (pat);
1550           if (xtarget == xspec_target)
1551             return xtarget;
1552           if (xtarget == xspec_target_subreg)
1553             return xspec_target;
1554           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1555         }
1556       delete_insns_since (last);
1557     }
1558
1559   /* If OP0 is a memory, try copying it to a register and seeing if a
1560      cheap register alternative is available.  */
1561   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1562     {
1563       enum machine_mode bestmode;
1564
1565       /* Get the mode to use for inserting into this field.  If
1566          OP0 is BLKmode, get the smallest mode consistent with the
1567          alignment. If OP0 is a non-BLKmode object that is no
1568          wider than EXT_MODE, use its mode. Otherwise, use the
1569          smallest mode containing the field.  */
1570
1571       if (GET_MODE (op0) == BLKmode
1572           || (ext_mode != MAX_MACHINE_MODE
1573               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1574         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1575                                   (ext_mode == MAX_MACHINE_MODE
1576                                    ? VOIDmode : ext_mode),
1577                                   MEM_VOLATILE_P (op0));
1578       else
1579         bestmode = GET_MODE (op0);
1580
1581       if (bestmode != VOIDmode
1582           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1583                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1584         {
1585           unsigned HOST_WIDE_INT xoffset, xbitpos;
1586
1587           /* Compute the offset as a multiple of this unit,
1588              counting in bytes.  */
1589           unit = GET_MODE_BITSIZE (bestmode);
1590           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1591           xbitpos = bitnum % unit;
1592
1593           /* Make sure the register is big enough for the whole field.  */
1594           if (xoffset * BITS_PER_UNIT + unit
1595               >= offset * BITS_PER_UNIT + bitsize)
1596             {
1597               rtx last, result, xop0;
1598
1599               last = get_last_insn ();
1600
1601               /* Fetch it to a register in that size.  */
1602               xop0 = adjust_address (op0, bestmode, xoffset);
1603               xop0 = force_reg (bestmode, xop0);
1604               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1605                                             unsignedp, target,
1606                                             mode, tmode, false);
1607               if (result)
1608                 return result;
1609
1610               delete_insns_since (last);
1611             }
1612         }
1613     }
1614
1615   if (!fallback_p)
1616     return NULL;
1617
1618   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1619                                     bitpos, target, unsignedp);
1620   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1621 }
1622
1623 /* Generate code to extract a byte-field from STR_RTX
1624    containing BITSIZE bits, starting at BITNUM,
1625    and put it in TARGET if possible (if TARGET is nonzero).
1626    Regardless of TARGET, we return the rtx for where the value is placed.
1627
1628    STR_RTX is the structure containing the byte (a REG or MEM).
1629    UNSIGNEDP is nonzero if this is an unsigned bit field.
1630    MODE is the natural mode of the field value once extracted.
1631    TMODE is the mode the caller would like the value to have;
1632    but the value may be returned with type MODE instead.
1633
1634    If a TARGET is specified and we can store in it at no extra cost,
1635    we do so, and return TARGET.
1636    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1637    if they are equally easy.  */
1638
1639 rtx
1640 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1641                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1642                    enum machine_mode mode, enum machine_mode tmode)
1643 {
1644   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1645                               target, mode, tmode, true);
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field that is split across two words
1857    and return an RTX for the result.
1858
1859    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1862
1863 static rtx
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1866 {
1867   unsigned int unit;
1868   unsigned int bitsdone = 0;
1869   rtx result = NULL_RTX;
1870   int first = 1;
1871
1872   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1873      much at a time.  */
1874   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875     unit = BITS_PER_WORD;
1876   else
1877     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1878
1879   while (bitsdone < bitsize)
1880     {
1881       unsigned HOST_WIDE_INT thissize;
1882       rtx part, word;
1883       unsigned HOST_WIDE_INT thispos;
1884       unsigned HOST_WIDE_INT offset;
1885
1886       offset = (bitpos + bitsdone) / unit;
1887       thispos = (bitpos + bitsdone) % unit;
1888
1889       /* THISSIZE must not overrun a word boundary.  Otherwise,
1890          extract_fixed_bit_field will call us again, and we will mutually
1891          recurse forever.  */
1892       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893       thissize = MIN (thissize, unit - thispos);
1894
1895       /* If OP0 is a register, then handle OFFSET here.
1896
1897          When handling multiword bitfields, extract_bit_field may pass
1898          down a word_mode SUBREG of a larger REG for a bitfield that actually
1899          crosses a word boundary.  Thus, for a SUBREG, we must find
1900          the current word starting from the base register.  */
1901       if (GET_CODE (op0) == SUBREG)
1902         {
1903           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905                                         GET_MODE (SUBREG_REG (op0)));
1906           offset = 0;
1907         }
1908       else if (REG_P (op0))
1909         {
1910           word = operand_subword_force (op0, offset, GET_MODE (op0));
1911           offset = 0;
1912         }
1913       else
1914         word = op0;
1915
1916       /* Extract the parts in bit-counting order,
1917          whose meaning is determined by BYTES_PER_UNIT.
1918          OFFSET is in UNITs, and UNIT is in bits.
1919          extract_fixed_bit_field wants offset in bytes.  */
1920       part = extract_fixed_bit_field (word_mode, word,
1921                                       offset * unit / BITS_PER_UNIT,
1922                                       thissize, thispos, 0, 1);
1923       bitsdone += thissize;
1924
1925       /* Shift this part into place for the result.  */
1926       if (BYTES_BIG_ENDIAN)
1927         {
1928           if (bitsize != bitsdone)
1929             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1930                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1931                                  0, 1);
1932         }
1933       else
1934         {
1935           if (bitsdone != thissize)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  build_int_cst (NULL_TREE,
1938                                                 bitsdone - thissize), 0, 1);
1939         }
1940
1941       if (first)
1942         result = part;
1943       else
1944         /* Combine the parts with bitwise or.  This works
1945            because we extracted each part as an unsigned bit field.  */
1946         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1947                                OPTAB_LIB_WIDEN);
1948
1949       first = 0;
1950     }
1951
1952   /* Unsigned bit field: we are done.  */
1953   if (unsignedp)
1954     return result;
1955   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1956   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1957                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1958                          NULL_RTX, 0);
1959   return expand_shift (RSHIFT_EXPR, word_mode, result,
1960                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1961                        NULL_RTX, 0);
1962 }
1963 \f
1964 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1965    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1966    MODE, fill the upper bits with zeros.  Fail if the layout of either
1967    mode is unknown (as for CC modes) or if the extraction would involve
1968    unprofitable mode punning.  Return the value on success, otherwise
1969    return null.
1970
1971    This is different from gen_lowpart* in these respects:
1972
1973      - the returned value must always be considered an rvalue
1974
1975      - when MODE is wider than SRC_MODE, the extraction involves
1976        a zero extension
1977
1978      - when MODE is smaller than SRC_MODE, the extraction involves
1979        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1980
1981    In other words, this routine performs a computation, whereas the
1982    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1983    operations.  */
1984
1985 rtx
1986 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1987 {
1988   enum machine_mode int_mode, src_int_mode;
1989
1990   if (mode == src_mode)
1991     return src;
1992
1993   if (CONSTANT_P (src))
1994     return simplify_gen_subreg (mode, src, src_mode,
1995                                 subreg_lowpart_offset (mode, src_mode));
1996
1997   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1998     return NULL_RTX;
1999
2000   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2001       && MODES_TIEABLE_P (mode, src_mode))
2002     {
2003       rtx x = gen_lowpart_common (mode, src);
2004       if (x)
2005         return x;
2006     }
2007
2008   src_int_mode = int_mode_for_mode (src_mode);
2009   int_mode = int_mode_for_mode (mode);
2010   if (src_int_mode == BLKmode || int_mode == BLKmode)
2011     return NULL_RTX;
2012
2013   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2014     return NULL_RTX;
2015   if (!MODES_TIEABLE_P (int_mode, mode))
2016     return NULL_RTX;
2017
2018   src = gen_lowpart (src_int_mode, src);
2019   src = convert_modes (int_mode, src_int_mode, src, true);
2020   src = gen_lowpart (mode, src);
2021   return src;
2022 }
2023 \f
2024 /* Add INC into TARGET.  */
2025
2026 void
2027 expand_inc (rtx target, rtx inc)
2028 {
2029   rtx value = expand_binop (GET_MODE (target), add_optab,
2030                             target, inc,
2031                             target, 0, OPTAB_LIB_WIDEN);
2032   if (value != target)
2033     emit_move_insn (target, value);
2034 }
2035
2036 /* Subtract DEC from TARGET.  */
2037
2038 void
2039 expand_dec (rtx target, rtx dec)
2040 {
2041   rtx value = expand_binop (GET_MODE (target), sub_optab,
2042                             target, dec,
2043                             target, 0, OPTAB_LIB_WIDEN);
2044   if (value != target)
2045     emit_move_insn (target, value);
2046 }
2047 \f
2048 /* Output a shift instruction for expression code CODE,
2049    with SHIFTED being the rtx for the value to shift,
2050    and AMOUNT the tree for the amount to shift by.
2051    Store the result in the rtx TARGET, if that is convenient.
2052    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2053    Return the rtx for where the value is.  */
2054
2055 rtx
2056 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2057               tree amount, rtx target, int unsignedp)
2058 {
2059   rtx op1, temp = 0;
2060   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2061   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2062   optab lshift_optab = ashl_optab;
2063   optab rshift_arith_optab = ashr_optab;
2064   optab rshift_uns_optab = lshr_optab;
2065   optab lrotate_optab = rotl_optab;
2066   optab rrotate_optab = rotr_optab;
2067   enum machine_mode op1_mode;
2068   int attempt;
2069   bool speed = optimize_insn_for_speed_p ();
2070
2071   op1 = expand_normal (amount);
2072   op1_mode = GET_MODE (op1);
2073
2074   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2075      shift amount is a vector, use the vector/vector shift patterns.  */
2076   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2077     {
2078       lshift_optab = vashl_optab;
2079       rshift_arith_optab = vashr_optab;
2080       rshift_uns_optab = vlshr_optab;
2081       lrotate_optab = vrotl_optab;
2082       rrotate_optab = vrotr_optab;
2083     }
2084
2085   /* Previously detected shift-counts computed by NEGATE_EXPR
2086      and shifted in the other direction; but that does not work
2087      on all machines.  */
2088
2089   if (SHIFT_COUNT_TRUNCATED)
2090     {
2091       if (GET_CODE (op1) == CONST_INT
2092           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2093               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2094         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2095                        % GET_MODE_BITSIZE (mode));
2096       else if (GET_CODE (op1) == SUBREG
2097                && subreg_lowpart_p (op1))
2098         op1 = SUBREG_REG (op1);
2099     }
2100
2101   if (op1 == const0_rtx)
2102     return shifted;
2103
2104   /* Check whether its cheaper to implement a left shift by a constant
2105      bit count by a sequence of additions.  */
2106   if (code == LSHIFT_EXPR
2107       && GET_CODE (op1) == CONST_INT
2108       && INTVAL (op1) > 0
2109       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2110       && INTVAL (op1) < MAX_BITS_PER_WORD
2111       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2112       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2113     {
2114       int i;
2115       for (i = 0; i < INTVAL (op1); i++)
2116         {
2117           temp = force_reg (mode, shifted);
2118           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2119                                   unsignedp, OPTAB_LIB_WIDEN);
2120         }
2121       return shifted;
2122     }
2123
2124   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2125     {
2126       enum optab_methods methods;
2127
2128       if (attempt == 0)
2129         methods = OPTAB_DIRECT;
2130       else if (attempt == 1)
2131         methods = OPTAB_WIDEN;
2132       else
2133         methods = OPTAB_LIB_WIDEN;
2134
2135       if (rotate)
2136         {
2137           /* Widening does not work for rotation.  */
2138           if (methods == OPTAB_WIDEN)
2139             continue;
2140           else if (methods == OPTAB_LIB_WIDEN)
2141             {
2142               /* If we have been unable to open-code this by a rotation,
2143                  do it as the IOR of two shifts.  I.e., to rotate A
2144                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2145                  where C is the bitsize of A.
2146
2147                  It is theoretically possible that the target machine might
2148                  not be able to perform either shift and hence we would
2149                  be making two libcalls rather than just the one for the
2150                  shift (similarly if IOR could not be done).  We will allow
2151                  this extremely unlikely lossage to avoid complicating the
2152                  code below.  */
2153
2154               rtx subtarget = target == shifted ? 0 : target;
2155               tree new_amount, other_amount;
2156               rtx temp1;
2157               tree type = TREE_TYPE (amount);
2158               if (GET_MODE (op1) != TYPE_MODE (type)
2159                   && GET_MODE (op1) != VOIDmode)
2160                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2161               new_amount = make_tree (type, op1);
2162               other_amount
2163                 = fold_build2 (MINUS_EXPR, type,
2164                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2165                                new_amount);
2166
2167               shifted = force_reg (mode, shifted);
2168
2169               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2170                                    mode, shifted, new_amount, 0, 1);
2171               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2172                                     mode, shifted, other_amount, subtarget, 1);
2173               return expand_binop (mode, ior_optab, temp, temp1, target,
2174                                    unsignedp, methods);
2175             }
2176
2177           temp = expand_binop (mode,
2178                                left ? lrotate_optab : rrotate_optab,
2179                                shifted, op1, target, unsignedp, methods);
2180         }
2181       else if (unsignedp)
2182         temp = expand_binop (mode,
2183                              left ? lshift_optab : rshift_uns_optab,
2184                              shifted, op1, target, unsignedp, methods);
2185
2186       /* Do arithmetic shifts.
2187          Also, if we are going to widen the operand, we can just as well
2188          use an arithmetic right-shift instead of a logical one.  */
2189       if (temp == 0 && ! rotate
2190           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2191         {
2192           enum optab_methods methods1 = methods;
2193
2194           /* If trying to widen a log shift to an arithmetic shift,
2195              don't accept an arithmetic shift of the same size.  */
2196           if (unsignedp)
2197             methods1 = OPTAB_MUST_WIDEN;
2198
2199           /* Arithmetic shift */
2200
2201           temp = expand_binop (mode,
2202                                left ? lshift_optab : rshift_arith_optab,
2203                                shifted, op1, target, unsignedp, methods1);
2204         }
2205
2206       /* We used to try extzv here for logical right shifts, but that was
2207          only useful for one machine, the VAX, and caused poor code
2208          generation there for lshrdi3, so the code was deleted and a
2209          define_expand for lshrsi3 was added to vax.md.  */
2210     }
2211
2212   gcc_assert (temp);
2213   return temp;
2214 }
2215 \f
2216 enum alg_code {
2217   alg_unknown,
2218   alg_zero,
2219   alg_m, alg_shift,
2220   alg_add_t_m2,
2221   alg_sub_t_m2,
2222   alg_add_factor,
2223   alg_sub_factor,
2224   alg_add_t2_m,
2225   alg_sub_t2_m,
2226   alg_impossible
2227 };
2228
2229 /* This structure holds the "cost" of a multiply sequence.  The
2230    "cost" field holds the total rtx_cost of every operator in the
2231    synthetic multiplication sequence, hence cost(a op b) is defined
2232    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2233    The "latency" field holds the minimum possible latency of the
2234    synthetic multiply, on a hypothetical infinitely parallel CPU.
2235    This is the critical path, or the maximum height, of the expression
2236    tree which is the sum of rtx_costs on the most expensive path from
2237    any leaf to the root.  Hence latency(a op b) is defined as zero for
2238    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2239
2240 struct mult_cost {
2241   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2242   short latency;  /* The latency of the multiplication sequence.  */
2243 };
2244
2245 /* This macro is used to compare a pointer to a mult_cost against an
2246    single integer "rtx_cost" value.  This is equivalent to the macro
2247    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2248 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2249                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2250
2251 /* This macro is used to compare two pointers to mult_costs against
2252    each other.  The macro returns true if X is cheaper than Y.
2253    Currently, the cheaper of two mult_costs is the one with the
2254    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2255 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2256                                  || ((X)->cost == (Y)->cost     \
2257                                      && (X)->latency < (Y)->latency))
2258
2259 /* This structure records a sequence of operations.
2260    `ops' is the number of operations recorded.
2261    `cost' is their total cost.
2262    The operations are stored in `op' and the corresponding
2263    logarithms of the integer coefficients in `log'.
2264
2265    These are the operations:
2266    alg_zero             total := 0;
2267    alg_m                total := multiplicand;
2268    alg_shift            total := total * coeff
2269    alg_add_t_m2         total := total + multiplicand * coeff;
2270    alg_sub_t_m2         total := total - multiplicand * coeff;
2271    alg_add_factor       total := total * coeff + total;
2272    alg_sub_factor       total := total * coeff - total;
2273    alg_add_t2_m         total := total * coeff + multiplicand;
2274    alg_sub_t2_m         total := total * coeff - multiplicand;
2275
2276    The first operand must be either alg_zero or alg_m.  */
2277
2278 struct algorithm
2279 {
2280   struct mult_cost cost;
2281   short ops;
2282   /* The size of the OP and LOG fields are not directly related to the
2283      word size, but the worst-case algorithms will be if we have few
2284      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2285      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2286      in total wordsize operations.  */
2287   enum alg_code op[MAX_BITS_PER_WORD];
2288   char log[MAX_BITS_PER_WORD];
2289 };
2290
2291 /* The entry for our multiplication cache/hash table.  */
2292 struct alg_hash_entry {
2293   /* The number we are multiplying by.  */
2294   unsigned HOST_WIDE_INT t;
2295
2296   /* The mode in which we are multiplying something by T.  */
2297   enum machine_mode mode;
2298
2299   /* The best multiplication algorithm for t.  */
2300   enum alg_code alg;
2301
2302   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2303      Otherwise, the cost within which multiplication by T is
2304      impossible.  */
2305   struct mult_cost cost;
2306
2307   /* OPtimized for speed? */
2308   bool speed;
2309 };
2310
2311 /* The number of cache/hash entries.  */
2312 #if HOST_BITS_PER_WIDE_INT == 64
2313 #define NUM_ALG_HASH_ENTRIES 1031
2314 #else
2315 #define NUM_ALG_HASH_ENTRIES 307
2316 #endif
2317
2318 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2319    actually a hash table.  If we have a collision, that the older
2320    entry is kicked out.  */
2321 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2322
2323 /* Indicates the type of fixup needed after a constant multiplication.
2324    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2325    the result should be negated, and ADD_VARIANT means that the
2326    multiplicand should be added to the result.  */
2327 enum mult_variant {basic_variant, negate_variant, add_variant};
2328
2329 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2330                         const struct mult_cost *, enum machine_mode mode);
2331 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2332                                  struct algorithm *, enum mult_variant *, int);
2333 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2334                               const struct algorithm *, enum mult_variant);
2335 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2336                                                  int, rtx *, int *, int *);
2337 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2338 static rtx extract_high_half (enum machine_mode, rtx);
2339 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2340 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2341                                        int, int);
2342 /* Compute and return the best algorithm for multiplying by T.
2343    The algorithm must cost less than cost_limit
2344    If retval.cost >= COST_LIMIT, no algorithm was found and all
2345    other field of the returned struct are undefined.
2346    MODE is the machine mode of the multiplication.  */
2347
2348 static void
2349 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2350             const struct mult_cost *cost_limit, enum machine_mode mode)
2351 {
2352   int m;
2353   struct algorithm *alg_in, *best_alg;
2354   struct mult_cost best_cost;
2355   struct mult_cost new_limit;
2356   int op_cost, op_latency;
2357   unsigned HOST_WIDE_INT q;
2358   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2359   int hash_index;
2360   bool cache_hit = false;
2361   enum alg_code cache_alg = alg_zero;
2362   bool speed = optimize_insn_for_speed_p ();
2363
2364   /* Indicate that no algorithm is yet found.  If no algorithm
2365      is found, this value will be returned and indicate failure.  */
2366   alg_out->cost.cost = cost_limit->cost + 1;
2367   alg_out->cost.latency = cost_limit->latency + 1;
2368
2369   if (cost_limit->cost < 0
2370       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2371     return;
2372
2373   /* Restrict the bits of "t" to the multiplication's mode.  */
2374   t &= GET_MODE_MASK (mode);
2375
2376   /* t == 1 can be done in zero cost.  */
2377   if (t == 1)
2378     {
2379       alg_out->ops = 1;
2380       alg_out->cost.cost = 0;
2381       alg_out->cost.latency = 0;
2382       alg_out->op[0] = alg_m;
2383       return;
2384     }
2385
2386   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2387      fail now.  */
2388   if (t == 0)
2389     {
2390       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2391         return;
2392       else
2393         {
2394           alg_out->ops = 1;
2395           alg_out->cost.cost = zero_cost[speed];
2396           alg_out->cost.latency = zero_cost[speed];
2397           alg_out->op[0] = alg_zero;
2398           return;
2399         }
2400     }
2401
2402   /* We'll be needing a couple extra algorithm structures now.  */
2403
2404   alg_in = XALLOCA (struct algorithm);
2405   best_alg = XALLOCA (struct algorithm);
2406   best_cost = *cost_limit;
2407
2408   /* Compute the hash index.  */
2409   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2410
2411   /* See if we already know what to do for T.  */
2412   if (alg_hash[hash_index].t == t
2413       && alg_hash[hash_index].mode == mode
2414       && alg_hash[hash_index].mode == mode
2415       && alg_hash[hash_index].speed == speed
2416       && alg_hash[hash_index].alg != alg_unknown)
2417     {
2418       cache_alg = alg_hash[hash_index].alg;
2419
2420       if (cache_alg == alg_impossible)
2421         {
2422           /* The cache tells us that it's impossible to synthesize
2423              multiplication by T within alg_hash[hash_index].cost.  */
2424           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2425             /* COST_LIMIT is at least as restrictive as the one
2426                recorded in the hash table, in which case we have no
2427                hope of synthesizing a multiplication.  Just
2428                return.  */
2429             return;
2430
2431           /* If we get here, COST_LIMIT is less restrictive than the
2432              one recorded in the hash table, so we may be able to
2433              synthesize a multiplication.  Proceed as if we didn't
2434              have the cache entry.  */
2435         }
2436       else
2437         {
2438           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2439             /* The cached algorithm shows that this multiplication
2440                requires more cost than COST_LIMIT.  Just return.  This
2441                way, we don't clobber this cache entry with
2442                alg_impossible but retain useful information.  */
2443             return;
2444
2445           cache_hit = true;
2446
2447           switch (cache_alg)
2448             {
2449             case alg_shift:
2450               goto do_alg_shift;
2451
2452             case alg_add_t_m2:
2453             case alg_sub_t_m2:
2454               goto do_alg_addsub_t_m2;
2455
2456             case alg_add_factor:
2457             case alg_sub_factor:
2458               goto do_alg_addsub_factor;
2459
2460             case alg_add_t2_m:
2461               goto do_alg_add_t2_m;
2462
2463             case alg_sub_t2_m:
2464               goto do_alg_sub_t2_m;
2465
2466             default:
2467               gcc_unreachable ();
2468             }
2469         }
2470     }
2471
2472   /* If we have a group of zero bits at the low-order part of T, try
2473      multiplying by the remaining bits and then doing a shift.  */
2474
2475   if ((t & 1) == 0)
2476     {
2477     do_alg_shift:
2478       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2479       if (m < maxm)
2480         {
2481           q = t >> m;
2482           /* The function expand_shift will choose between a shift and
2483              a sequence of additions, so the observed cost is given as
2484              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2485           op_cost = m * add_cost[speed][mode];
2486           if (shift_cost[speed][mode][m] < op_cost)
2487             op_cost = shift_cost[speed][mode][m];
2488           new_limit.cost = best_cost.cost - op_cost;
2489           new_limit.latency = best_cost.latency - op_cost;
2490           synth_mult (alg_in, q, &new_limit, mode);
2491
2492           alg_in->cost.cost += op_cost;
2493           alg_in->cost.latency += op_cost;
2494           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2495             {
2496               struct algorithm *x;
2497               best_cost = alg_in->cost;
2498               x = alg_in, alg_in = best_alg, best_alg = x;
2499               best_alg->log[best_alg->ops] = m;
2500               best_alg->op[best_alg->ops] = alg_shift;
2501             }
2502         }
2503       if (cache_hit)
2504         goto done;
2505     }
2506
2507   /* If we have an odd number, add or subtract one.  */
2508   if ((t & 1) != 0)
2509     {
2510       unsigned HOST_WIDE_INT w;
2511
2512     do_alg_addsub_t_m2:
2513       for (w = 1; (w & t) != 0; w <<= 1)
2514         ;
2515       /* If T was -1, then W will be zero after the loop.  This is another
2516          case where T ends with ...111.  Handling this with (T + 1) and
2517          subtract 1 produces slightly better code and results in algorithm
2518          selection much faster than treating it like the ...0111 case
2519          below.  */
2520       if (w == 0
2521           || (w > 2
2522               /* Reject the case where t is 3.
2523                  Thus we prefer addition in that case.  */
2524               && t != 3))
2525         {
2526           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2527
2528           op_cost = add_cost[speed][mode];
2529           new_limit.cost = best_cost.cost - op_cost;
2530           new_limit.latency = best_cost.latency - op_cost;
2531           synth_mult (alg_in, t + 1, &new_limit, mode);
2532
2533           alg_in->cost.cost += op_cost;
2534           alg_in->cost.latency += op_cost;
2535           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2536             {
2537               struct algorithm *x;
2538               best_cost = alg_in->cost;
2539               x = alg_in, alg_in = best_alg, best_alg = x;
2540               best_alg->log[best_alg->ops] = 0;
2541               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2542             }
2543         }
2544       else
2545         {
2546           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2547
2548           op_cost = add_cost[speed][mode];
2549           new_limit.cost = best_cost.cost - op_cost;
2550           new_limit.latency = best_cost.latency - op_cost;
2551           synth_mult (alg_in, t - 1, &new_limit, mode);
2552
2553           alg_in->cost.cost += op_cost;
2554           alg_in->cost.latency += op_cost;
2555           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2556             {
2557               struct algorithm *x;
2558               best_cost = alg_in->cost;
2559               x = alg_in, alg_in = best_alg, best_alg = x;
2560               best_alg->log[best_alg->ops] = 0;
2561               best_alg->op[best_alg->ops] = alg_add_t_m2;
2562             }
2563         }
2564       if (cache_hit)
2565         goto done;
2566     }
2567
2568   /* Look for factors of t of the form
2569      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2570      If we find such a factor, we can multiply by t using an algorithm that
2571      multiplies by q, shift the result by m and add/subtract it to itself.
2572
2573      We search for large factors first and loop down, even if large factors
2574      are less probable than small; if we find a large factor we will find a
2575      good sequence quickly, and therefore be able to prune (by decreasing
2576      COST_LIMIT) the search.  */
2577
2578  do_alg_addsub_factor:
2579   for (m = floor_log2 (t - 1); m >= 2; m--)
2580     {
2581       unsigned HOST_WIDE_INT d;
2582
2583       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2584       if (t % d == 0 && t > d && m < maxm
2585           && (!cache_hit || cache_alg == alg_add_factor))
2586         {
2587           /* If the target has a cheap shift-and-add instruction use
2588              that in preference to a shift insn followed by an add insn.
2589              Assume that the shift-and-add is "atomic" with a latency
2590              equal to its cost, otherwise assume that on superscalar
2591              hardware the shift may be executed concurrently with the
2592              earlier steps in the algorithm.  */
2593           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2594           if (shiftadd_cost[speed][mode][m] < op_cost)
2595             {
2596               op_cost = shiftadd_cost[speed][mode][m];
2597               op_latency = op_cost;
2598             }
2599           else
2600             op_latency = add_cost[speed][mode];
2601
2602           new_limit.cost = best_cost.cost - op_cost;
2603           new_limit.latency = best_cost.latency - op_latency;
2604           synth_mult (alg_in, t / d, &new_limit, mode);
2605
2606           alg_in->cost.cost += op_cost;
2607           alg_in->cost.latency += op_latency;
2608           if (alg_in->cost.latency < op_cost)
2609             alg_in->cost.latency = op_cost;
2610           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611             {
2612               struct algorithm *x;
2613               best_cost = alg_in->cost;
2614               x = alg_in, alg_in = best_alg, best_alg = x;
2615               best_alg->log[best_alg->ops] = m;
2616               best_alg->op[best_alg->ops] = alg_add_factor;
2617             }
2618           /* Other factors will have been taken care of in the recursion.  */
2619           break;
2620         }
2621
2622       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2623       if (t % d == 0 && t > d && m < maxm
2624           && (!cache_hit || cache_alg == alg_sub_factor))
2625         {
2626           /* If the target has a cheap shift-and-subtract insn use
2627              that in preference to a shift insn followed by a sub insn.
2628              Assume that the shift-and-sub is "atomic" with a latency
2629              equal to it's cost, otherwise assume that on superscalar
2630              hardware the shift may be executed concurrently with the
2631              earlier steps in the algorithm.  */
2632           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2633           if (shiftsub_cost[speed][mode][m] < op_cost)
2634             {
2635               op_cost = shiftsub_cost[speed][mode][m];
2636               op_latency = op_cost;
2637             }
2638           else
2639             op_latency = add_cost[speed][mode];
2640
2641           new_limit.cost = best_cost.cost - op_cost;
2642           new_limit.latency = best_cost.latency - op_latency;
2643           synth_mult (alg_in, t / d, &new_limit, mode);
2644
2645           alg_in->cost.cost += op_cost;
2646           alg_in->cost.latency += op_latency;
2647           if (alg_in->cost.latency < op_cost)
2648             alg_in->cost.latency = op_cost;
2649           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2650             {
2651               struct algorithm *x;
2652               best_cost = alg_in->cost;
2653               x = alg_in, alg_in = best_alg, best_alg = x;
2654               best_alg->log[best_alg->ops] = m;
2655               best_alg->op[best_alg->ops] = alg_sub_factor;
2656             }
2657           break;
2658         }
2659     }
2660   if (cache_hit)
2661     goto done;
2662
2663   /* Try shift-and-add (load effective address) instructions,
2664      i.e. do a*3, a*5, a*9.  */
2665   if ((t & 1) != 0)
2666     {
2667     do_alg_add_t2_m:
2668       q = t - 1;
2669       q = q & -q;
2670       m = exact_log2 (q);
2671       if (m >= 0 && m < maxm)
2672         {
2673           op_cost = shiftadd_cost[speed][mode][m];
2674           new_limit.cost = best_cost.cost - op_cost;
2675           new_limit.latency = best_cost.latency - op_cost;
2676           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2677
2678           alg_in->cost.cost += op_cost;
2679           alg_in->cost.latency += op_cost;
2680           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2681             {
2682               struct algorithm *x;
2683               best_cost = alg_in->cost;
2684               x = alg_in, alg_in = best_alg, best_alg = x;
2685               best_alg->log[best_alg->ops] = m;
2686               best_alg->op[best_alg->ops] = alg_add_t2_m;
2687             }
2688         }
2689       if (cache_hit)
2690         goto done;
2691
2692     do_alg_sub_t2_m:
2693       q = t + 1;
2694       q = q & -q;
2695       m = exact_log2 (q);
2696       if (m >= 0 && m < maxm)
2697         {
2698           op_cost = shiftsub_cost[speed][mode][m];
2699           new_limit.cost = best_cost.cost - op_cost;
2700           new_limit.latency = best_cost.latency - op_cost;
2701           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2702
2703           alg_in->cost.cost += op_cost;
2704           alg_in->cost.latency += op_cost;
2705           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2706             {
2707               struct algorithm *x;
2708               best_cost = alg_in->cost;
2709               x = alg_in, alg_in = best_alg, best_alg = x;
2710               best_alg->log[best_alg->ops] = m;
2711               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2712             }
2713         }
2714       if (cache_hit)
2715         goto done;
2716     }
2717
2718  done:
2719   /* If best_cost has not decreased, we have not found any algorithm.  */
2720   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2721     {
2722       /* We failed to find an algorithm.  Record alg_impossible for
2723          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2724          we are asked to find an algorithm for T within the same or
2725          lower COST_LIMIT, we can immediately return to the
2726          caller.  */
2727       alg_hash[hash_index].t = t;
2728       alg_hash[hash_index].mode = mode;
2729       alg_hash[hash_index].speed = speed;
2730       alg_hash[hash_index].alg = alg_impossible;
2731       alg_hash[hash_index].cost = *cost_limit;
2732       return;
2733     }
2734
2735   /* Cache the result.  */
2736   if (!cache_hit)
2737     {
2738       alg_hash[hash_index].t = t;
2739       alg_hash[hash_index].mode = mode;
2740       alg_hash[hash_index].speed = speed;
2741       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2742       alg_hash[hash_index].cost.cost = best_cost.cost;
2743       alg_hash[hash_index].cost.latency = best_cost.latency;
2744     }
2745
2746   /* If we are getting a too long sequence for `struct algorithm'
2747      to record, make this search fail.  */
2748   if (best_alg->ops == MAX_BITS_PER_WORD)
2749     return;
2750
2751   /* Copy the algorithm from temporary space to the space at alg_out.
2752      We avoid using structure assignment because the majority of
2753      best_alg is normally undefined, and this is a critical function.  */
2754   alg_out->ops = best_alg->ops + 1;
2755   alg_out->cost = best_cost;
2756   memcpy (alg_out->op, best_alg->op,
2757           alg_out->ops * sizeof *alg_out->op);
2758   memcpy (alg_out->log, best_alg->log,
2759           alg_out->ops * sizeof *alg_out->log);
2760 }
2761 \f
2762 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2763    Try three variations:
2764
2765        - a shift/add sequence based on VAL itself
2766        - a shift/add sequence based on -VAL, followed by a negation
2767        - a shift/add sequence based on VAL - 1, followed by an addition.
2768
2769    Return true if the cheapest of these cost less than MULT_COST,
2770    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2771
2772 static bool
2773 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2774                      struct algorithm *alg, enum mult_variant *variant,
2775                      int mult_cost)
2776 {
2777   struct algorithm alg2;
2778   struct mult_cost limit;
2779   int op_cost;
2780   bool speed = optimize_insn_for_speed_p ();
2781
2782   /* Fail quickly for impossible bounds.  */
2783   if (mult_cost < 0)
2784     return false;
2785
2786   /* Ensure that mult_cost provides a reasonable upper bound.
2787      Any constant multiplication can be performed with less
2788      than 2 * bits additions.  */
2789   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2790   if (mult_cost > op_cost)
2791     mult_cost = op_cost;
2792
2793   *variant = basic_variant;
2794   limit.cost = mult_cost;
2795   limit.latency = mult_cost;
2796   synth_mult (alg, val, &limit, mode);
2797
2798   /* This works only if the inverted value actually fits in an
2799      `unsigned int' */
2800   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2801     {
2802       op_cost = neg_cost[speed][mode];
2803       if (MULT_COST_LESS (&alg->cost, mult_cost))
2804         {
2805           limit.cost = alg->cost.cost - op_cost;
2806           limit.latency = alg->cost.latency - op_cost;
2807         }
2808       else
2809         {
2810           limit.cost = mult_cost - op_cost;
2811           limit.latency = mult_cost - op_cost;
2812         }
2813
2814       synth_mult (&alg2, -val, &limit, mode);
2815       alg2.cost.cost += op_cost;
2816       alg2.cost.latency += op_cost;
2817       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2818         *alg = alg2, *variant = negate_variant;
2819     }
2820
2821   /* This proves very useful for division-by-constant.  */
2822   op_cost = add_cost[speed][mode];
2823   if (MULT_COST_LESS (&alg->cost, mult_cost))
2824     {
2825       limit.cost = alg->cost.cost - op_cost;
2826       limit.latency = alg->cost.latency - op_cost;
2827     }
2828   else
2829     {
2830       limit.cost = mult_cost - op_cost;
2831       limit.latency = mult_cost - op_cost;
2832     }
2833
2834   synth_mult (&alg2, val - 1, &limit, mode);
2835   alg2.cost.cost += op_cost;
2836   alg2.cost.latency += op_cost;
2837   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2838     *alg = alg2, *variant = add_variant;
2839
2840   return MULT_COST_LESS (&alg->cost, mult_cost);
2841 }
2842
2843 /* A subroutine of expand_mult, used for constant multiplications.
2844    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2845    convenient.  Use the shift/add sequence described by ALG and apply
2846    the final fixup specified by VARIANT.  */
2847
2848 static rtx
2849 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2850                    rtx target, const struct algorithm *alg,
2851                    enum mult_variant variant)
2852 {
2853   HOST_WIDE_INT val_so_far;
2854   rtx insn, accum, tem;
2855   int opno;
2856   enum machine_mode nmode;
2857
2858   /* Avoid referencing memory over and over and invalid sharing
2859      on SUBREGs.  */
2860   op0 = force_reg (mode, op0);
2861
2862   /* ACCUM starts out either as OP0 or as a zero, depending on
2863      the first operation.  */
2864
2865   if (alg->op[0] == alg_zero)
2866     {
2867       accum = copy_to_mode_reg (mode, const0_rtx);
2868       val_so_far = 0;
2869     }
2870   else if (alg->op[0] == alg_m)
2871     {
2872       accum = copy_to_mode_reg (mode, op0);
2873       val_so_far = 1;
2874     }
2875   else
2876     gcc_unreachable ();
2877
2878   for (opno = 1; opno < alg->ops; opno++)
2879     {
2880       int log = alg->log[opno];
2881       rtx shift_subtarget = optimize ? 0 : accum;
2882       rtx add_target
2883         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2884            && !optimize)
2885           ? target : 0;
2886       rtx accum_target = optimize ? 0 : accum;
2887
2888       switch (alg->op[opno])
2889         {
2890         case alg_shift:
2891           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2892                                 build_int_cst (NULL_TREE, log),
2893                                 NULL_RTX, 0);
2894           val_so_far <<= log;
2895           break;
2896
2897         case alg_add_t_m2:
2898           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2899                               build_int_cst (NULL_TREE, log),
2900                               NULL_RTX, 0);
2901           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2902                                  add_target ? add_target : accum_target);
2903           val_so_far += (HOST_WIDE_INT) 1 << log;
2904           break;
2905
2906         case alg_sub_t_m2:
2907           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2908                               build_int_cst (NULL_TREE, log),
2909                               NULL_RTX, 0);
2910           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2911                                  add_target ? add_target : accum_target);
2912           val_so_far -= (HOST_WIDE_INT) 1 << log;
2913           break;
2914
2915         case alg_add_t2_m:
2916           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2917                                 build_int_cst (NULL_TREE, log),
2918                                 shift_subtarget,
2919                                 0);
2920           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2921                                  add_target ? add_target : accum_target);
2922           val_so_far = (val_so_far << log) + 1;
2923           break;
2924
2925         case alg_sub_t2_m:
2926           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2927                                 build_int_cst (NULL_TREE, log),
2928                                 shift_subtarget, 0);
2929           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2930                                  add_target ? add_target : accum_target);
2931           val_so_far = (val_so_far << log) - 1;
2932           break;
2933
2934         case alg_add_factor:
2935           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2936                               build_int_cst (NULL_TREE, log),
2937                               NULL_RTX, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far += val_so_far << log;
2941           break;
2942
2943         case alg_sub_factor:
2944           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2945                               build_int_cst (NULL_TREE, log),
2946                               NULL_RTX, 0);
2947           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2948                                  (add_target
2949                                   ? add_target : (optimize ? 0 : tem)));
2950           val_so_far = (val_so_far << log) - val_so_far;
2951           break;
2952
2953         default:
2954           gcc_unreachable ();
2955         }
2956
2957       /* Write a REG_EQUAL note on the last insn so that we can cse
2958          multiplication sequences.  Note that if ACCUM is a SUBREG,
2959          we've set the inner register and must properly indicate
2960          that.  */
2961
2962       tem = op0, nmode = mode;
2963       if (GET_CODE (accum) == SUBREG)
2964         {
2965           nmode = GET_MODE (SUBREG_REG (accum));
2966           tem = gen_lowpart (nmode, op0);
2967         }
2968
2969       insn = get_last_insn ();
2970       set_unique_reg_note (insn, REG_EQUAL,
2971                            gen_rtx_MULT (nmode, tem,
2972                                          GEN_INT (val_so_far)));
2973     }
2974
2975   if (variant == negate_variant)
2976     {
2977       val_so_far = -val_so_far;
2978       accum = expand_unop (mode, neg_optab, accum, target, 0);
2979     }
2980   else if (variant == add_variant)
2981     {
2982       val_so_far = val_so_far + 1;
2983       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2984     }
2985
2986   /* Compare only the bits of val and val_so_far that are significant
2987      in the result mode, to avoid sign-/zero-extension confusion.  */
2988   val &= GET_MODE_MASK (mode);
2989   val_so_far &= GET_MODE_MASK (mode);
2990   gcc_assert (val == val_so_far);
2991
2992   return accum;
2993 }
2994
2995 /* Perform a multiplication and return an rtx for the result.
2996    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2997    TARGET is a suggestion for where to store the result (an rtx).
2998
2999    We check specially for a constant integer as OP1.
3000    If you want this check for OP0 as well, then before calling
3001    you should swap the two operands if OP0 would be constant.  */
3002
3003 rtx
3004 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3005              int unsignedp)
3006 {
3007   enum mult_variant variant;
3008   struct algorithm algorithm;
3009   int max_cost;
3010   bool speed = optimize_insn_for_speed_p ();
3011
3012   /* Handling const0_rtx here allows us to use zero as a rogue value for
3013      coeff below.  */
3014   if (op1 == const0_rtx)
3015     return const0_rtx;
3016   if (op1 == const1_rtx)
3017     return op0;
3018   if (op1 == constm1_rtx)
3019     return expand_unop (mode,
3020                         GET_MODE_CLASS (mode) == MODE_INT
3021                         && !unsignedp && flag_trapv
3022                         ? negv_optab : neg_optab,
3023                         op0, target, 0);
3024
3025   /* These are the operations that are potentially turned into a sequence
3026      of shifts and additions.  */
3027   if (SCALAR_INT_MODE_P (mode)
3028       && (unsignedp || !flag_trapv))
3029     {
3030       HOST_WIDE_INT coeff = 0;
3031       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3032
3033       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3034          less than or equal in size to `unsigned int' this doesn't matter.
3035          If the mode is larger than `unsigned int', then synth_mult works
3036          only if the constant value exactly fits in an `unsigned int' without
3037          any truncation.  This means that multiplying by negative values does
3038          not work; results are off by 2^32 on a 32 bit machine.  */
3039
3040       if (GET_CODE (op1) == CONST_INT)
3041         {
3042           /* Attempt to handle multiplication of DImode values by negative
3043              coefficients, by performing the multiplication by a positive
3044              multiplier and then inverting the result.  */
3045           if (INTVAL (op1) < 0
3046               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3047             {
3048               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3049                  result is interpreted as an unsigned coefficient.
3050                  Exclude cost of op0 from max_cost to match the cost
3051                  calculation of the synth_mult.  */
3052               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3053                          - neg_cost[speed][mode];
3054               if (max_cost > 0
3055                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3056                                           &variant, max_cost))
3057                 {
3058                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3059                                                 NULL_RTX, &algorithm,
3060                                                 variant);
3061                   return expand_unop (mode, neg_optab, temp, target, 0);
3062                 }
3063             }
3064           else coeff = INTVAL (op1);
3065         }
3066       else if (GET_CODE (op1) == CONST_DOUBLE)
3067         {
3068           /* If we are multiplying in DImode, it may still be a win
3069              to try to work with shifts and adds.  */
3070           if (CONST_DOUBLE_HIGH (op1) == 0)
3071             coeff = CONST_DOUBLE_LOW (op1);
3072           else if (CONST_DOUBLE_LOW (op1) == 0
3073                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3074             {
3075               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3076                           + HOST_BITS_PER_WIDE_INT;
3077               return expand_shift (LSHIFT_EXPR, mode, op0,
3078                                    build_int_cst (NULL_TREE, shift),
3079                                    target, unsignedp);
3080             }
3081         }
3082
3083       /* We used to test optimize here, on the grounds that it's better to
3084          produce a smaller program when -O is not used.  But this causes
3085          such a terrible slowdown sometimes that it seems better to always
3086          use synth_mult.  */
3087       if (coeff != 0)
3088         {
3089           /* Special case powers of two.  */
3090           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3091             return expand_shift (LSHIFT_EXPR, mode, op0,
3092                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3093                                  target, unsignedp);
3094
3095           /* Exclude cost of op0 from max_cost to match the cost
3096              calculation of the synth_mult.  */
3097           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3098           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3099                                    max_cost))
3100             return expand_mult_const (mode, op0, coeff, target,
3101                                       &algorithm, variant);
3102         }
3103     }
3104
3105   if (GET_CODE (op0) == CONST_DOUBLE)
3106     {
3107       rtx temp = op0;
3108       op0 = op1;
3109       op1 = temp;
3110     }
3111
3112   /* Expand x*2.0 as x+x.  */
3113   if (GET_CODE (op1) == CONST_DOUBLE
3114       && SCALAR_FLOAT_MODE_P (mode))
3115     {
3116       REAL_VALUE_TYPE d;
3117       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3118
3119       if (REAL_VALUES_EQUAL (d, dconst2))
3120         {
3121           op0 = force_reg (GET_MODE (op0), op0);
3122           return expand_binop (mode, add_optab, op0, op0,
3123                                target, unsignedp, OPTAB_LIB_WIDEN);
3124         }
3125     }
3126
3127   /* This used to use umul_optab if unsigned, but for non-widening multiply
3128      there is no difference between signed and unsigned.  */
3129   op0 = expand_binop (mode,
3130                       ! unsignedp
3131                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3132                       ? smulv_optab : smul_optab,
3133                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3134   gcc_assert (op0);
3135   return op0;
3136 }
3137 \f
3138 /* Return the smallest n such that 2**n >= X.  */
3139
3140 int
3141 ceil_log2 (unsigned HOST_WIDE_INT x)
3142 {
3143   return floor_log2 (x - 1) + 1;
3144 }
3145
3146 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3147    replace division by D, and put the least significant N bits of the result
3148    in *MULTIPLIER_PTR and return the most significant bit.
3149
3150    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3151    needed precision is in PRECISION (should be <= N).
3152
3153    PRECISION should be as small as possible so this function can choose
3154    multiplier more freely.
3155
3156    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3157    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3158
3159    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3160    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3161
3162 static
3163 unsigned HOST_WIDE_INT
3164 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3165                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3166 {
3167   HOST_WIDE_INT mhigh_hi, mlow_hi;
3168   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3169   int lgup, post_shift;
3170   int pow, pow2;
3171   unsigned HOST_WIDE_INT nl, dummy1;
3172   HOST_WIDE_INT nh, dummy2;
3173
3174   /* lgup = ceil(log2(divisor)); */
3175   lgup = ceil_log2 (d);
3176
3177   gcc_assert (lgup <= n);
3178
3179   pow = n + lgup;
3180   pow2 = n + lgup - precision;
3181
3182   /* We could handle this with some effort, but this case is much
3183      better handled directly with a scc insn, so rely on caller using
3184      that.  */
3185   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3186
3187   /* mlow = 2^(N + lgup)/d */
3188  if (pow >= HOST_BITS_PER_WIDE_INT)
3189     {
3190       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3191       nl = 0;
3192     }
3193   else
3194     {
3195       nh = 0;
3196       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3197     }
3198   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3199                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3200
3201   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3202   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3203     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3204   else
3205     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3206   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3207                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3208
3209   gcc_assert (!mhigh_hi || nh - d < d);
3210   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3211   /* Assert that mlow < mhigh.  */
3212   gcc_assert (mlow_hi < mhigh_hi
3213               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3214
3215   /* If precision == N, then mlow, mhigh exceed 2^N
3216      (but they do not exceed 2^(N+1)).  */
3217
3218   /* Reduce to lowest terms.  */
3219   for (post_shift = lgup; post_shift > 0; post_shift--)
3220     {
3221       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3222       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3223       if (ml_lo >= mh_lo)
3224         break;
3225
3226       mlow_hi = 0;
3227       mlow_lo = ml_lo;
3228       mhigh_hi = 0;
3229       mhigh_lo = mh_lo;
3230     }
3231
3232   *post_shift_ptr = post_shift;
3233   *lgup_ptr = lgup;
3234   if (n < HOST_BITS_PER_WIDE_INT)
3235     {
3236       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3237       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3238       return mhigh_lo >= mask;
3239     }
3240   else
3241     {
3242       *multiplier_ptr = GEN_INT (mhigh_lo);
3243       return mhigh_hi;
3244     }
3245 }
3246
3247 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3248    congruent to 1 (mod 2**N).  */
3249
3250 static unsigned HOST_WIDE_INT
3251 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3252 {
3253   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3254
3255   /* The algorithm notes that the choice y = x satisfies
3256      x*y == 1 mod 2^3, since x is assumed odd.
3257      Each iteration doubles the number of bits of significance in y.  */
3258
3259   unsigned HOST_WIDE_INT mask;
3260   unsigned HOST_WIDE_INT y = x;
3261   int nbit = 3;
3262
3263   mask = (n == HOST_BITS_PER_WIDE_INT
3264           ? ~(unsigned HOST_WIDE_INT) 0
3265           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3266
3267   while (nbit < n)
3268     {
3269       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3270       nbit *= 2;
3271     }
3272   return y;
3273 }
3274
3275 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3276    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3277    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3278    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3279    become signed.
3280
3281    The result is put in TARGET if that is convenient.
3282
3283    MODE is the mode of operation.  */
3284
3285 rtx
3286 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3287                              rtx op1, rtx target, int unsignedp)
3288 {
3289   rtx tem;
3290   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3291
3292   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3293                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3294                       NULL_RTX, 0);
3295   tem = expand_and (mode, tem, op1, NULL_RTX);
3296   adj_operand
3297     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3298                      adj_operand);
3299
3300   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3301                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3302                       NULL_RTX, 0);
3303   tem = expand_and (mode, tem, op0, NULL_RTX);
3304   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3305                           target);
3306
3307   return target;
3308 }
3309
3310 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3311
3312 static rtx
3313 extract_high_half (enum machine_mode mode, rtx op)
3314 {
3315   enum machine_mode wider_mode;
3316
3317   if (mode == word_mode)
3318     return gen_highpart (mode, op);
3319
3320   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3321
3322   wider_mode = GET_MODE_WIDER_MODE (mode);
3323   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3324                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3325   return convert_modes (mode, wider_mode, op, 0);
3326 }
3327
3328 /* Like expand_mult_highpart, but only consider using a multiplication
3329    optab.  OP1 is an rtx for the constant operand.  */
3330
3331 static rtx
3332 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3333                             rtx target, int unsignedp, int max_cost)
3334 {
3335   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3336   enum machine_mode wider_mode;
3337   optab moptab;
3338   rtx tem;
3339   int size;
3340   bool speed = optimize_insn_for_speed_p ();
3341
3342   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3343
3344   wider_mode = GET_MODE_WIDER_MODE (mode);
3345   size = GET_MODE_BITSIZE (mode);
3346
3347   /* Firstly, try using a multiplication insn that only generates the needed
3348      high part of the product, and in the sign flavor of unsignedp.  */
3349   if (mul_highpart_cost[speed][mode] < max_cost)
3350     {
3351       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3352       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3353                           unsignedp, OPTAB_DIRECT);
3354       if (tem)
3355         return tem;
3356     }
3357
3358   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3359      Need to adjust the result after the multiplication.  */
3360   if (size - 1 < BITS_PER_WORD
3361       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3362           + 4 * add_cost[speed][mode] < max_cost))
3363     {
3364       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3365       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3366                           unsignedp, OPTAB_DIRECT);
3367       if (tem)
3368         /* We used the wrong signedness.  Adjust the result.  */
3369         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3370                                             tem, unsignedp);
3371     }
3372
3373   /* Try widening multiplication.  */
3374   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3375   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3376       && mul_widen_cost[speed][wider_mode] < max_cost)
3377     {
3378       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3379                           unsignedp, OPTAB_WIDEN);
3380       if (tem)
3381         return extract_high_half (mode, tem);
3382     }
3383
3384   /* Try widening the mode and perform a non-widening multiplication.  */
3385   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3386       && size - 1 < BITS_PER_WORD
3387       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3388     {
3389       rtx insns, wop0, wop1;
3390
3391       /* We need to widen the operands, for example to ensure the
3392          constant multiplier is correctly sign or zero extended.
3393          Use a sequence to clean-up any instructions emitted by
3394          the conversions if things don't work out.  */
3395       start_sequence ();
3396       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3397       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3398       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3399                           unsignedp, OPTAB_WIDEN);
3400       insns = get_insns ();
3401       end_sequence ();
3402
3403       if (tem)
3404         {
3405           emit_insn (insns);
3406           return extract_high_half (mode, tem);
3407         }
3408     }
3409
3410   /* Try widening multiplication of opposite signedness, and adjust.  */
3411   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3412   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3413       && size - 1 < BITS_PER_WORD
3414       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3415           + 4 * add_cost[speed][mode] < max_cost))
3416     {
3417       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3418                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3419       if (tem != 0)
3420         {
3421           tem = extract_high_half (mode, tem);
3422           /* We used the wrong signedness.  Adjust the result.  */
3423           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3424                                               target, unsignedp);
3425         }
3426     }
3427
3428   return 0;
3429 }
3430
3431 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3432    putting the high half of the result in TARGET if that is convenient,
3433    and return where the result is.  If the operation can not be performed,
3434    0 is returned.
3435
3436    MODE is the mode of operation and result.
3437
3438    UNSIGNEDP nonzero means unsigned multiply.
3439
3440    MAX_COST is the total allowed cost for the expanded RTL.  */
3441
3442 static rtx
3443 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3444                       rtx target, int unsignedp, int max_cost)
3445 {
3446   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3447   unsigned HOST_WIDE_INT cnst1;
3448   int extra_cost;
3449   bool sign_adjust = false;
3450   enum mult_variant variant;
3451   struct algorithm alg;
3452   rtx tem;
3453   bool speed = optimize_insn_for_speed_p ();
3454
3455   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3456   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3457   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3458
3459   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3460
3461   /* We can't optimize modes wider than BITS_PER_WORD.
3462      ??? We might be able to perform double-word arithmetic if
3463      mode == word_mode, however all the cost calculations in
3464      synth_mult etc. assume single-word operations.  */
3465   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3466     return expand_mult_highpart_optab (mode, op0, op1, target,
3467                                        unsignedp, max_cost);
3468
3469   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3470
3471   /* Check whether we try to multiply by a negative constant.  */
3472   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3473     {
3474       sign_adjust = true;
3475       extra_cost += add_cost[speed][mode];
3476     }
3477
3478   /* See whether shift/add multiplication is cheap enough.  */
3479   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3480                            max_cost - extra_cost))
3481     {
3482       /* See whether the specialized multiplication optabs are
3483          cheaper than the shift/add version.  */
3484       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3485                                         alg.cost.cost + extra_cost);
3486       if (tem)
3487         return tem;
3488
3489       tem = convert_to_mode (wider_mode, op0, unsignedp);
3490       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3491       tem = extract_high_half (mode, tem);
3492
3493       /* Adjust result for signedness.  */
3494       if (sign_adjust)
3495         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3496
3497       return tem;
3498     }
3499   return expand_mult_highpart_optab (mode, op0, op1, target,
3500                                      unsignedp, max_cost);
3501 }
3502
3503
3504 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3505
3506 static rtx
3507 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3508 {
3509   unsigned HOST_WIDE_INT masklow, maskhigh;
3510   rtx result, temp, shift, label;
3511   int logd;
3512
3513   logd = floor_log2 (d);
3514   result = gen_reg_rtx (mode);
3515
3516   /* Avoid conditional branches when they're expensive.  */
3517   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3518       && optimize_insn_for_speed_p ())
3519     {
3520       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3521                                       mode, 0, -1);
3522       if (signmask)
3523         {
3524           signmask = force_reg (mode, signmask);
3525           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3526           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3527
3528           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3529              which instruction sequence to use.  If logical right shifts
3530              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3531              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3532
3533           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3534           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3535               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3536             {
3537               temp = expand_binop (mode, xor_optab, op0, signmask,
3538                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3539               temp = expand_binop (mode, sub_optab, temp, signmask,
3540                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3541               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3542                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543               temp = expand_binop (mode, xor_optab, temp, signmask,
3544                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3545               temp = expand_binop (mode, sub_optab, temp, signmask,
3546                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3547             }
3548           else
3549             {
3550               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3551                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552               signmask = force_reg (mode, signmask);
3553
3554               temp = expand_binop (mode, add_optab, op0, signmask,
3555                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3556               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3557                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3558               temp = expand_binop (mode, sub_optab, temp, signmask,
3559                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3560             }
3561           return temp;
3562         }
3563     }
3564
3565   /* Mask contains the mode's signbit and the significant bits of the
3566      modulus.  By including the signbit in the operation, many targets
3567      can avoid an explicit compare operation in the following comparison
3568      against zero.  */
3569
3570   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3571   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3572     {
3573       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3574       maskhigh = -1;
3575     }
3576   else
3577     maskhigh = (HOST_WIDE_INT) -1
3578                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3579
3580   temp = expand_binop (mode, and_optab, op0,
3581                        immed_double_const (masklow, maskhigh, mode),
3582                        result, 1, OPTAB_LIB_WIDEN);
3583   if (temp != result)
3584     emit_move_insn (result, temp);
3585
3586   label = gen_label_rtx ();
3587   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3588
3589   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3590                        0, OPTAB_LIB_WIDEN);
3591   masklow = (HOST_WIDE_INT) -1 << logd;
3592   maskhigh = -1;
3593   temp = expand_binop (mode, ior_optab, temp,
3594                        immed_double_const (masklow, maskhigh, mode),
3595                        result, 1, OPTAB_LIB_WIDEN);
3596   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3597                        0, OPTAB_LIB_WIDEN);
3598   if (temp != result)
3599     emit_move_insn (result, temp);
3600   emit_label (label);
3601   return result;
3602 }
3603
3604 /* Expand signed division of OP0 by a power of two D in mode MODE.
3605    This routine is only called for positive values of D.  */
3606
3607 static rtx
3608 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3609 {
3610   rtx temp, label;
3611   tree shift;
3612   int logd;
3613
3614   logd = floor_log2 (d);
3615   shift = build_int_cst (NULL_TREE, logd);
3616
3617   if (d == 2
3618       && BRANCH_COST (optimize_insn_for_speed_p (),
3619                       false) >= 1)
3620     {
3621       temp = gen_reg_rtx (mode);
3622       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3623       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3624                            0, OPTAB_LIB_WIDEN);
3625       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3626     }
3627
3628 #ifdef HAVE_conditional_move
3629   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3630       >= 2)
3631     {
3632       rtx temp2;
3633
3634       /* ??? emit_conditional_move forces a stack adjustment via
3635          compare_from_rtx so, if the sequence is discarded, it will
3636          be lost.  Do it now instead.  */
3637       do_pending_stack_adjust ();
3638
3639       start_sequence ();
3640       temp2 = copy_to_mode_reg (mode, op0);
3641       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3642                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3643       temp = force_reg (mode, temp);
3644
3645       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3646       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3647                                      mode, temp, temp2, mode, 0);
3648       if (temp2)
3649         {
3650           rtx seq = get_insns ();
3651           end_sequence ();
3652           emit_insn (seq);
3653           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3654         }
3655       end_sequence ();
3656     }
3657 #endif
3658
3659   if (BRANCH_COST (optimize_insn_for_speed_p (),
3660                    false) >= 2)
3661     {
3662       int ushift = GET_MODE_BITSIZE (mode) - logd;
3663
3664       temp = gen_reg_rtx (mode);
3665       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3666       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3667         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3668                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3669       else
3670         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3671                              build_int_cst (NULL_TREE, ushift),
3672                              NULL_RTX, 1);
3673       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3674                            0, OPTAB_LIB_WIDEN);
3675       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3676     }
3677
3678   label = gen_label_rtx ();
3679   temp = copy_to_mode_reg (mode, op0);
3680   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3681   expand_inc (temp, GEN_INT (d - 1));
3682   emit_label (label);
3683   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3684 }
3685 \f
3686 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3687    if that is convenient, and returning where the result is.
3688    You may request either the quotient or the remainder as the result;
3689    specify REM_FLAG nonzero to get the remainder.
3690
3691    CODE is the expression code for which kind of division this is;
3692    it controls how rounding is done.  MODE is the machine mode to use.
3693    UNSIGNEDP nonzero means do unsigned division.  */
3694
3695 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3696    and then correct it by or'ing in missing high bits
3697    if result of ANDI is nonzero.
3698    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3699    This could optimize to a bfexts instruction.
3700    But C doesn't use these operations, so their optimizations are
3701    left for later.  */
3702 /* ??? For modulo, we don't actually need the highpart of the first product,
3703    the low part will do nicely.  And for small divisors, the second multiply
3704    can also be a low-part only multiply or even be completely left out.
3705    E.g. to calculate the remainder of a division by 3 with a 32 bit
3706    multiply, multiply with 0x55555556 and extract the upper two bits;
3707    the result is exact for inputs up to 0x1fffffff.
3708    The input range can be reduced by using cross-sum rules.
3709    For odd divisors >= 3, the following table gives right shift counts
3710    so that if a number is shifted by an integer multiple of the given
3711    amount, the remainder stays the same:
3712    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3713    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3714    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3715    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3716    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3717
3718    Cross-sum rules for even numbers can be derived by leaving as many bits
3719    to the right alone as the divisor has zeros to the right.
3720    E.g. if x is an unsigned 32 bit number:
3721    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3722    */
3723
3724 rtx
3725 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3726                rtx op0, rtx op1, rtx target, int unsignedp)
3727 {
3728   enum machine_mode compute_mode;
3729   rtx tquotient;
3730   rtx quotient = 0, remainder = 0;
3731   rtx last;
3732   int size;
3733   rtx insn, set;
3734   optab optab1, optab2;
3735   int op1_is_constant, op1_is_pow2 = 0;
3736   int max_cost, extra_cost;
3737   static HOST_WIDE_INT last_div_const = 0;
3738   static HOST_WIDE_INT ext_op1;
3739   bool speed = optimize_insn_for_speed_p ();
3740
3741   op1_is_constant = GET_CODE (op1) == CONST_INT;
3742   if (op1_is_constant)
3743     {
3744       ext_op1 = INTVAL (op1);
3745       if (unsignedp)
3746         ext_op1 &= GET_MODE_MASK (mode);
3747       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3748                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3749     }
3750
3751   /*
3752      This is the structure of expand_divmod:
3753
3754      First comes code to fix up the operands so we can perform the operations
3755      correctly and efficiently.
3756
3757      Second comes a switch statement with code specific for each rounding mode.
3758      For some special operands this code emits all RTL for the desired
3759      operation, for other cases, it generates only a quotient and stores it in
3760      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3761      to indicate that it has not done anything.
3762
3763      Last comes code that finishes the operation.  If QUOTIENT is set and
3764      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3765      QUOTIENT is not set, it is computed using trunc rounding.
3766
3767      We try to generate special code for division and remainder when OP1 is a
3768      constant.  If |OP1| = 2**n we can use shifts and some other fast
3769      operations.  For other values of OP1, we compute a carefully selected
3770      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3771      by m.
3772
3773      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3774      half of the product.  Different strategies for generating the product are
3775      implemented in expand_mult_highpart.
3776
3777      If what we actually want is the remainder, we generate that by another
3778      by-constant multiplication and a subtraction.  */
3779
3780   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3781      code below will malfunction if we are, so check here and handle
3782      the special case if so.  */
3783   if (op1 == const1_rtx)
3784     return rem_flag ? const0_rtx : op0;
3785
3786     /* When dividing by -1, we could get an overflow.
3787      negv_optab can handle overflows.  */
3788   if (! unsignedp && op1 == constm1_rtx)
3789     {
3790       if (rem_flag)
3791         return const0_rtx;
3792       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3793                           ? negv_optab : neg_optab, op0, target, 0);
3794     }
3795
3796   if (target
3797       /* Don't use the function value register as a target
3798          since we have to read it as well as write it,
3799          and function-inlining gets confused by this.  */
3800       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3801           /* Don't clobber an operand while doing a multi-step calculation.  */
3802           || ((rem_flag || op1_is_constant)
3803               && (reg_mentioned_p (target, op0)
3804                   || (MEM_P (op0) && MEM_P (target))))
3805           || reg_mentioned_p (target, op1)
3806           || (MEM_P (op1) && MEM_P (target))))
3807     target = 0;
3808
3809   /* Get the mode in which to perform this computation.  Normally it will
3810      be MODE, but sometimes we can't do the desired operation in MODE.
3811      If so, pick a wider mode in which we can do the operation.  Convert
3812      to that mode at the start to avoid repeated conversions.
3813
3814      First see what operations we need.  These depend on the expression
3815      we are evaluating.  (We assume that divxx3 insns exist under the
3816      same conditions that modxx3 insns and that these insns don't normally
3817      fail.  If these assumptions are not correct, we may generate less
3818      efficient code in some cases.)
3819
3820      Then see if we find a mode in which we can open-code that operation
3821      (either a division, modulus, or shift).  Finally, check for the smallest
3822      mode for which we can do the operation with a library call.  */
3823
3824   /* We might want to refine this now that we have division-by-constant
3825      optimization.  Since expand_mult_highpart tries so many variants, it is
3826      not straightforward to generalize this.  Maybe we should make an array
3827      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3828
3829   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3830             ? (unsignedp ? lshr_optab : ashr_optab)
3831             : (unsignedp ? udiv_optab : sdiv_optab));
3832   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3833             ? optab1
3834             : (unsignedp ? udivmod_optab : sdivmod_optab));
3835
3836   for (compute_mode = mode; compute_mode != VOIDmode;
3837        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3838     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3839         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3840       break;
3841
3842   if (compute_mode == VOIDmode)
3843     for (compute_mode = mode; compute_mode != VOIDmode;
3844          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3845       if (optab_libfunc (optab1, compute_mode)
3846           || optab_libfunc (optab2, compute_mode))
3847         break;
3848
3849   /* If we still couldn't find a mode, use MODE, but expand_binop will
3850      probably die.  */
3851   if (compute_mode == VOIDmode)
3852     compute_mode = mode;
3853
3854   if (target && GET_MODE (target) == compute_mode)
3855     tquotient = target;
3856   else
3857     tquotient = gen_reg_rtx (compute_mode);
3858
3859   size = GET_MODE_BITSIZE (compute_mode);
3860 #if 0
3861   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3862      (mode), and thereby get better code when OP1 is a constant.  Do that
3863      later.  It will require going over all usages of SIZE below.  */
3864   size = GET_MODE_BITSIZE (mode);
3865 #endif
3866
3867   /* Only deduct something for a REM if the last divide done was
3868      for a different constant.   Then set the constant of the last
3869      divide.  */
3870   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3871   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3872                      && INTVAL (op1) == last_div_const))
3873     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3874
3875   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3876
3877   /* Now convert to the best mode to use.  */
3878   if (compute_mode != mode)
3879     {
3880       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3881       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3882
3883       /* convert_modes may have placed op1 into a register, so we
3884          must recompute the following.  */
3885       op1_is_constant = GET_CODE (op1) == CONST_INT;
3886       op1_is_pow2 = (op1_is_constant
3887                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3888                           || (! unsignedp
3889                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3890     }
3891
3892   /* If one of the operands is a volatile MEM, copy it into a register.  */
3893
3894   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3895     op0 = force_reg (compute_mode, op0);
3896   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3897     op1 = force_reg (compute_mode, op1);
3898
3899   /* If we need the remainder or if OP1 is constant, we need to
3900      put OP0 in a register in case it has any queued subexpressions.  */
3901   if (rem_flag || op1_is_constant)
3902     op0 = force_reg (compute_mode, op0);
3903
3904   last = get_last_insn ();
3905
3906   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3907   if (unsignedp)
3908     {
3909       if (code == FLOOR_DIV_EXPR)
3910         code = TRUNC_DIV_EXPR;
3911       if (code == FLOOR_MOD_EXPR)
3912         code = TRUNC_MOD_EXPR;
3913       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3914         code = TRUNC_DIV_EXPR;
3915     }
3916
3917   if (op1 != const0_rtx)
3918     switch (code)
3919       {
3920       case TRUNC_MOD_EXPR:
3921       case TRUNC_DIV_EXPR:
3922         if (op1_is_constant)
3923           {
3924             if (unsignedp)
3925               {
3926                 unsigned HOST_WIDE_INT mh;
3927                 int pre_shift, post_shift;
3928                 int dummy;
3929                 rtx ml;
3930                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3931                                             & GET_MODE_MASK (compute_mode));
3932
3933                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3934                   {
3935                     pre_shift = floor_log2 (d);
3936                     if (rem_flag)
3937                       {
3938                         remainder
3939                           = expand_binop (compute_mode, and_optab, op0,
3940                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3941                                           remainder, 1,
3942                                           OPTAB_LIB_WIDEN);
3943                         if (remainder)
3944                           return gen_lowpart (mode, remainder);
3945                       }
3946                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3947                                              build_int_cst (NULL_TREE,
3948                                                             pre_shift),
3949                                              tquotient, 1);
3950                   }
3951                 else if (size <= HOST_BITS_PER_WIDE_INT)
3952                   {
3953                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3954                       {
3955                         /* Most significant bit of divisor is set; emit an scc
3956                            insn.  */
3957                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3958                                                     compute_mode, 1, 1);
3959                         if (quotient == 0)
3960                           goto fail1;
3961                       }
3962                     else
3963                       {
3964                         /* Find a suitable multiplier and right shift count
3965                            instead of multiplying with D.  */
3966
3967                         mh = choose_multiplier (d, size, size,
3968                                                 &ml, &post_shift, &dummy);
3969
3970                         /* If the suggested multiplier is more than SIZE bits,
3971                            we can do better for even divisors, using an
3972                            initial right shift.  */
3973                         if (mh != 0 && (d & 1) == 0)
3974                           {
3975                             pre_shift = floor_log2 (d & -d);
3976                             mh = choose_multiplier (d >> pre_shift, size,
3977                                                     size - pre_shift,
3978                                                     &ml, &post_shift, &dummy);
3979                             gcc_assert (!mh);
3980                           }
3981                         else
3982                           pre_shift = 0;
3983
3984                         if (mh != 0)
3985                           {
3986                             rtx t1, t2, t3, t4;
3987
3988                             if (post_shift - 1 >= BITS_PER_WORD)
3989                               goto fail1;
3990
3991                             extra_cost
3992                               = (shift_cost[speed][compute_mode][post_shift - 1]
3993                                  + shift_cost[speed][compute_mode][1]
3994                                  + 2 * add_cost[speed][compute_mode]);
3995                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3996                                                        NULL_RTX, 1,
3997                                                        max_cost - extra_cost);
3998                             if (t1 == 0)
3999                               goto fail1;
4000                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4001                                                                op0, t1),
4002                                                 NULL_RTX);
4003                             t3 = expand_shift
4004                               (RSHIFT_EXPR, compute_mode, t2,
4005                                build_int_cst (NULL_TREE, 1),
4006                                NULL_RTX,1);
4007                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4008                                                               t1, t3),
4009                                                 NULL_RTX);
4010                             quotient = expand_shift
4011                               (RSHIFT_EXPR, compute_mode, t4,
4012                                build_int_cst (NULL_TREE, post_shift - 1),
4013                                tquotient, 1);
4014                           }
4015                         else
4016                           {
4017                             rtx t1, t2;
4018
4019                             if (pre_shift >= BITS_PER_WORD
4020                                 || post_shift >= BITS_PER_WORD)
4021                               goto fail1;
4022
4023                             t1 = expand_shift
4024                               (RSHIFT_EXPR, compute_mode, op0,
4025                                build_int_cst (NULL_TREE, pre_shift),
4026                                NULL_RTX, 1);
4027                             extra_cost
4028                               = (shift_cost[speed][compute_mode][pre_shift]
4029                                  + shift_cost[speed][compute_mode][post_shift]);
4030                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4031                                                        NULL_RTX, 1,
4032                                                        max_cost - extra_cost);
4033                             if (t2 == 0)
4034                               goto fail1;
4035                             quotient = expand_shift
4036                               (RSHIFT_EXPR, compute_mode, t2,
4037                                build_int_cst (NULL_TREE, post_shift),
4038                                tquotient, 1);
4039                           }
4040                       }
4041                   }
4042                 else            /* Too wide mode to use tricky code */
4043                   break;
4044
4045                 insn = get_last_insn ();
4046                 if (insn != last
4047                     && (set = single_set (insn)) != 0
4048                     && SET_DEST (set) == quotient)
4049                   set_unique_reg_note (insn,
4050                                        REG_EQUAL,
4051                                        gen_rtx_UDIV (compute_mode, op0, op1));
4052               }
4053             else                /* TRUNC_DIV, signed */
4054               {
4055                 unsigned HOST_WIDE_INT ml;
4056                 int lgup, post_shift;
4057                 rtx mlr;
4058                 HOST_WIDE_INT d = INTVAL (op1);
4059                 unsigned HOST_WIDE_INT abs_d;
4060
4061                 /* Since d might be INT_MIN, we have to cast to
4062                    unsigned HOST_WIDE_INT before negating to avoid
4063                    undefined signed overflow.  */
4064                 abs_d = (d >= 0
4065                          ? (unsigned HOST_WIDE_INT) d
4066                          : - (unsigned HOST_WIDE_INT) d);
4067
4068                 /* n rem d = n rem -d */
4069                 if (rem_flag && d < 0)
4070                   {
4071                     d = abs_d;
4072                     op1 = gen_int_mode (abs_d, compute_mode);
4073                   }
4074
4075                 if (d == 1)
4076                   quotient = op0;
4077                 else if (d == -1)
4078                   quotient = expand_unop (compute_mode, neg_optab, op0,
4079                                           tquotient, 0);
4080                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4081                   {
4082                     /* This case is not handled correctly below.  */
4083                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4084                                                 compute_mode, 1, 1);
4085                     if (quotient == 0)
4086                       goto fail1;
4087                   }
4088                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4089                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4090                                       : sdiv_pow2_cheap[compute_mode])
4091                          /* We assume that cheap metric is true if the
4092                             optab has an expander for this mode.  */
4093                          && ((optab_handler ((rem_flag ? smod_optab
4094                                               : sdiv_optab),
4095                                               compute_mode)->insn_code
4096                               != CODE_FOR_nothing)
4097                              || (optab_handler(sdivmod_optab,
4098                                                compute_mode)
4099                                  ->insn_code != CODE_FOR_nothing)))
4100                   ;
4101                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4102                   {
4103                     if (rem_flag)
4104                       {
4105                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4106                         if (remainder)
4107                           return gen_lowpart (mode, remainder);
4108                       }
4109
4110                     if (sdiv_pow2_cheap[compute_mode]
4111                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4112                              != CODE_FOR_nothing)
4113                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4114                                 != CODE_FOR_nothing)))
4115                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4116                                                 compute_mode, op0,
4117                                                 gen_int_mode (abs_d,
4118                                                               compute_mode),
4119                                                 NULL_RTX, 0);
4120                     else
4121                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4122
4123                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4124                        negate the quotient.  */
4125                     if (d < 0)
4126                       {
4127                         insn = get_last_insn ();
4128                         if (insn != last
4129                             && (set = single_set (insn)) != 0
4130                             && SET_DEST (set) == quotient
4131                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4132                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4133                           set_unique_reg_note (insn,
4134                                                REG_EQUAL,
4135                                                gen_rtx_DIV (compute_mode,
4136                                                             op0,
4137                                                             GEN_INT
4138                                                             (trunc_int_for_mode
4139                                                              (abs_d,
4140                                                               compute_mode))));
4141
4142                         quotient = expand_unop (compute_mode, neg_optab,
4143                                                 quotient, quotient, 0);
4144                       }
4145                   }
4146                 else if (size <= HOST_BITS_PER_WIDE_INT)
4147                   {
4148                     choose_multiplier (abs_d, size, size - 1,
4149                                        &mlr, &post_shift, &lgup);
4150                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4151                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4152                       {
4153                         rtx t1, t2, t3;
4154
4155                         if (post_shift >= BITS_PER_WORD
4156                             || size - 1 >= BITS_PER_WORD)
4157                           goto fail1;
4158
4159                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4160                                       + shift_cost[speed][compute_mode][size - 1]
4161                                       + add_cost[speed][compute_mode]);
4162                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4163                                                    NULL_RTX, 0,
4164                                                    max_cost - extra_cost);
4165                         if (t1 == 0)
4166                           goto fail1;
4167                         t2 = expand_shift
4168                           (RSHIFT_EXPR, compute_mode, t1,
4169                            build_int_cst (NULL_TREE, post_shift),
4170                            NULL_RTX, 0);
4171                         t3 = expand_shift
4172                           (RSHIFT_EXPR, compute_mode, op0,
4173                            build_int_cst (NULL_TREE, size - 1),
4174                            NULL_RTX, 0);
4175                         if (d < 0)
4176                           quotient
4177                             = force_operand (gen_rtx_MINUS (compute_mode,
4178                                                             t3, t2),
4179                                              tquotient);
4180                         else
4181                           quotient
4182                             = force_operand (gen_rtx_MINUS (compute_mode,
4183                                                             t2, t3),
4184                                              tquotient);
4185                       }
4186                     else
4187                       {
4188                         rtx t1, t2, t3, t4;
4189
4190                         if (post_shift >= BITS_PER_WORD
4191                             || size - 1 >= BITS_PER_WORD)
4192                           goto fail1;
4193
4194                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4195                         mlr = gen_int_mode (ml, compute_mode);
4196                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4197                                       + shift_cost[speed][compute_mode][size - 1]
4198                                       + 2 * add_cost[speed][compute_mode]);
4199                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4200                                                    NULL_RTX, 0,
4201                                                    max_cost - extra_cost);
4202                         if (t1 == 0)
4203                           goto fail1;
4204                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4205                                                           t1, op0),
4206                                             NULL_RTX);
4207                         t3 = expand_shift
4208                           (RSHIFT_EXPR, compute_mode, t2,
4209                            build_int_cst (NULL_TREE, post_shift),
4210                            NULL_RTX, 0);
4211                         t4 = expand_shift
4212                           (RSHIFT_EXPR, compute_mode, op0,
4213                            build_int_cst (NULL_TREE, size - 1),
4214                            NULL_RTX, 0);
4215                         if (d < 0)
4216                           quotient
4217                             = force_operand (gen_rtx_MINUS (compute_mode,
4218                                                             t4, t3),
4219                                              tquotient);
4220                         else
4221                           quotient
4222                             = force_operand (gen_rtx_MINUS (compute_mode,
4223                                                             t3, t4),
4224                                              tquotient);
4225                       }
4226                   }
4227                 else            /* Too wide mode to use tricky code */
4228                   break;
4229
4230                 insn = get_last_insn ();
4231                 if (insn != last
4232                     && (set = single_set (insn)) != 0
4233                     && SET_DEST (set) == quotient)
4234                   set_unique_reg_note (insn,
4235                                        REG_EQUAL,
4236                                        gen_rtx_DIV (compute_mode, op0, op1));
4237               }
4238             break;
4239           }
4240       fail1:
4241         delete_insns_since (last);
4242         break;
4243
4244       case FLOOR_DIV_EXPR:
4245       case FLOOR_MOD_EXPR:
4246       /* We will come here only for signed operations.  */
4247         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4248           {
4249             unsigned HOST_WIDE_INT mh;
4250             int pre_shift, lgup, post_shift;
4251             HOST_WIDE_INT d = INTVAL (op1);
4252             rtx ml;
4253
4254             if (d > 0)
4255               {
4256                 /* We could just as easily deal with negative constants here,
4257                    but it does not seem worth the trouble for GCC 2.6.  */
4258                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4259                   {
4260                     pre_shift = floor_log2 (d);
4261                     if (rem_flag)
4262                       {
4263                         remainder = expand_binop (compute_mode, and_optab, op0,
4264                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4265                                                   remainder, 0, OPTAB_LIB_WIDEN);
4266                         if (remainder)
4267                           return gen_lowpart (mode, remainder);
4268                       }
4269                     quotient = expand_shift
4270                       (RSHIFT_EXPR, compute_mode, op0,
4271                        build_int_cst (NULL_TREE, pre_shift),
4272                        tquotient, 0);
4273                   }
4274                 else
4275                   {
4276                     rtx t1, t2, t3, t4;
4277
4278                     mh = choose_multiplier (d, size, size - 1,
4279                                             &ml, &post_shift, &lgup);
4280                     gcc_assert (!mh);
4281
4282                     if (post_shift < BITS_PER_WORD
4283                         && size - 1 < BITS_PER_WORD)
4284                       {
4285                         t1 = expand_shift
4286                           (RSHIFT_EXPR, compute_mode, op0,
4287                            build_int_cst (NULL_TREE, size - 1),
4288                            NULL_RTX, 0);
4289                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4290                                            NULL_RTX, 0, OPTAB_WIDEN);
4291                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4292                                       + shift_cost[speed][compute_mode][size - 1]
4293                                       + 2 * add_cost[speed][compute_mode]);
4294                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4295                                                    NULL_RTX, 1,
4296                                                    max_cost - extra_cost);
4297                         if (t3 != 0)
4298                           {
4299                             t4 = expand_shift
4300                               (RSHIFT_EXPR, compute_mode, t3,
4301                                build_int_cst (NULL_TREE, post_shift),
4302                                NULL_RTX, 1);
4303                             quotient = expand_binop (compute_mode, xor_optab,
4304                                                      t4, t1, tquotient, 0,
4305                                                      OPTAB_WIDEN);
4306                           }
4307                       }
4308                   }
4309               }
4310             else
4311               {
4312                 rtx nsign, t1, t2, t3, t4;
4313                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4314                                                   op0, constm1_rtx), NULL_RTX);
4315                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4316                                    0, OPTAB_WIDEN);
4317                 nsign = expand_shift
4318                   (RSHIFT_EXPR, compute_mode, t2,
4319                    build_int_cst (NULL_TREE, size - 1),
4320                    NULL_RTX, 0);
4321                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4322                                     NULL_RTX);
4323                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4324                                     NULL_RTX, 0);
4325                 if (t4)
4326                   {
4327                     rtx t5;
4328                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4329                                       NULL_RTX, 0);
4330                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4331                                                             t4, t5),
4332                                               tquotient);
4333                   }
4334               }
4335           }
4336
4337         if (quotient != 0)
4338           break;
4339         delete_insns_since (last);
4340
4341         /* Try using an instruction that produces both the quotient and
4342            remainder, using truncation.  We can easily compensate the quotient
4343            or remainder to get floor rounding, once we have the remainder.
4344            Notice that we compute also the final remainder value here,
4345            and return the result right away.  */
4346         if (target == 0 || GET_MODE (target) != compute_mode)
4347           target = gen_reg_rtx (compute_mode);
4348
4349         if (rem_flag)
4350           {
4351             remainder
4352               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4353             quotient = gen_reg_rtx (compute_mode);
4354           }
4355         else
4356           {
4357             quotient
4358               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4359             remainder = gen_reg_rtx (compute_mode);
4360           }
4361
4362         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4363                                  quotient, remainder, 0))
4364           {
4365             /* This could be computed with a branch-less sequence.
4366                Save that for later.  */
4367             rtx tem;
4368             rtx label = gen_label_rtx ();
4369             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4370             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4371                                 NULL_RTX, 0, OPTAB_WIDEN);
4372             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4373             expand_dec (quotient, const1_rtx);
4374             expand_inc (remainder, op1);
4375             emit_label (label);
4376             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4377           }
4378
4379         /* No luck with division elimination or divmod.  Have to do it
4380            by conditionally adjusting op0 *and* the result.  */
4381         {
4382           rtx label1, label2, label3, label4, label5;
4383           rtx adjusted_op0;
4384           rtx tem;
4385
4386           quotient = gen_reg_rtx (compute_mode);
4387           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4388           label1 = gen_label_rtx ();
4389           label2 = gen_label_rtx ();
4390           label3 = gen_label_rtx ();
4391           label4 = gen_label_rtx ();
4392           label5 = gen_label_rtx ();
4393           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4394           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4395           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4396                               quotient, 0, OPTAB_LIB_WIDEN);
4397           if (tem != quotient)
4398             emit_move_insn (quotient, tem);
4399           emit_jump_insn (gen_jump (label5));
4400           emit_barrier ();
4401           emit_label (label1);
4402           expand_inc (adjusted_op0, const1_rtx);
4403           emit_jump_insn (gen_jump (label4));
4404           emit_barrier ();
4405           emit_label (label2);
4406           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4407           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4408                               quotient, 0, OPTAB_LIB_WIDEN);
4409           if (tem != quotient)
4410             emit_move_insn (quotient, tem);
4411           emit_jump_insn (gen_jump (label5));
4412           emit_barrier ();
4413           emit_label (label3);
4414           expand_dec (adjusted_op0, const1_rtx);
4415           emit_label (label4);
4416           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4417                               quotient, 0, OPTAB_LIB_WIDEN);
4418           if (tem != quotient)
4419             emit_move_insn (quotient, tem);
4420           expand_dec (quotient, const1_rtx);
4421           emit_label (label5);
4422         }
4423         break;
4424
4425       case CEIL_DIV_EXPR:
4426       case CEIL_MOD_EXPR:
4427         if (unsignedp)
4428           {
4429             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4430               {
4431                 rtx t1, t2, t3;
4432                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4433                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4434                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4435                                    tquotient, 1);
4436                 t2 = expand_binop (compute_mode, and_optab, op0,
4437                                    GEN_INT (d - 1),
4438                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4439                 t3 = gen_reg_rtx (compute_mode);
4440                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4441                                       compute_mode, 1, 1);
4442                 if (t3 == 0)
4443                   {
4444                     rtx lab;
4445                     lab = gen_label_rtx ();
4446                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4447                     expand_inc (t1, const1_rtx);
4448                     emit_label (lab);
4449                     quotient = t1;
4450                   }
4451                 else
4452                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4453                                                           t1, t3),
4454                                             tquotient);
4455                 break;
4456               }
4457
4458             /* Try using an instruction that produces both the quotient and
4459                remainder, using truncation.  We can easily compensate the
4460                quotient or remainder to get ceiling rounding, once we have the
4461                remainder.  Notice that we compute also the final remainder
4462                value here, and return the result right away.  */
4463             if (target == 0 || GET_MODE (target) != compute_mode)
4464               target = gen_reg_rtx (compute_mode);
4465
4466             if (rem_flag)
4467               {
4468                 remainder = (REG_P (target)
4469                              ? target : gen_reg_rtx (compute_mode));
4470                 quotient = gen_reg_rtx (compute_mode);
4471               }
4472             else
4473               {
4474                 quotient = (REG_P (target)
4475                             ? target : gen_reg_rtx (compute_mode));
4476                 remainder = gen_reg_rtx (compute_mode);
4477               }
4478
4479             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4480                                      remainder, 1))
4481               {
4482                 /* This could be computed with a branch-less sequence.
4483                    Save that for later.  */
4484                 rtx label = gen_label_rtx ();
4485                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4486                                  compute_mode, label);
4487                 expand_inc (quotient, const1_rtx);
4488                 expand_dec (remainder, op1);
4489                 emit_label (label);
4490                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4491               }
4492
4493             /* No luck with division elimination or divmod.  Have to do it
4494                by conditionally adjusting op0 *and* the result.  */
4495             {
4496               rtx label1, label2;
4497               rtx adjusted_op0, tem;
4498
4499               quotient = gen_reg_rtx (compute_mode);
4500               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4501               label1 = gen_label_rtx ();
4502               label2 = gen_label_rtx ();
4503               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4504                                compute_mode, label1);
4505               emit_move_insn  (quotient, const0_rtx);
4506               emit_jump_insn (gen_jump (label2));
4507               emit_barrier ();
4508               emit_label (label1);
4509               expand_dec (adjusted_op0, const1_rtx);
4510               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4511                                   quotient, 1, OPTAB_LIB_WIDEN);
4512               if (tem != quotient)
4513                 emit_move_insn (quotient, tem);
4514               expand_inc (quotient, const1_rtx);
4515               emit_label (label2);
4516             }
4517           }
4518         else /* signed */
4519           {
4520             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4521                 && INTVAL (op1) >= 0)
4522               {
4523                 /* This is extremely similar to the code for the unsigned case
4524                    above.  For 2.7 we should merge these variants, but for
4525                    2.6.1 I don't want to touch the code for unsigned since that
4526                    get used in C.  The signed case will only be used by other
4527                    languages (Ada).  */
4528
4529                 rtx t1, t2, t3;
4530                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4531                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4532                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4533                                    tquotient, 0);
4534                 t2 = expand_binop (compute_mode, and_optab, op0,
4535                                    GEN_INT (d - 1),
4536                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4537                 t3 = gen_reg_rtx (compute_mode);
4538                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4539                                       compute_mode, 1, 1);
4540                 if (t3 == 0)
4541                   {
4542                     rtx lab;
4543                     lab = gen_label_rtx ();
4544                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4545                     expand_inc (t1, const1_rtx);
4546                     emit_label (lab);
4547                     quotient = t1;
4548                   }
4549                 else
4550                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4551                                                           t1, t3),
4552                                             tquotient);
4553                 break;
4554               }
4555
4556             /* Try using an instruction that produces both the quotient and
4557                remainder, using truncation.  We can easily compensate the
4558                quotient or remainder to get ceiling rounding, once we have the
4559                remainder.  Notice that we compute also the final remainder
4560                value here, and return the result right away.  */
4561             if (target == 0 || GET_MODE (target) != compute_mode)
4562               target = gen_reg_rtx (compute_mode);
4563             if (rem_flag)
4564               {
4565                 remainder= (REG_P (target)
4566                             ? target : gen_reg_rtx (compute_mode));
4567                 quotient = gen_reg_rtx (compute_mode);
4568               }
4569             else
4570               {
4571                 quotient = (REG_P (target)
4572                             ? target : gen_reg_rtx (compute_mode));
4573                 remainder = gen_reg_rtx (compute_mode);
4574               }
4575
4576             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4577                                      remainder, 0))
4578               {
4579                 /* This could be computed with a branch-less sequence.
4580                    Save that for later.  */
4581                 rtx tem;
4582                 rtx label = gen_label_rtx ();
4583                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4584                                  compute_mode, label);
4585                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4586                                     NULL_RTX, 0, OPTAB_WIDEN);
4587                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4588                 expand_inc (quotient, const1_rtx);
4589                 expand_dec (remainder, op1);
4590                 emit_label (label);
4591                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4592               }
4593
4594             /* No luck with division elimination or divmod.  Have to do it
4595                by conditionally adjusting op0 *and* the result.  */
4596             {
4597               rtx label1, label2, label3, label4, label5;
4598               rtx adjusted_op0;
4599               rtx tem;
4600
4601               quotient = gen_reg_rtx (compute_mode);
4602               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4603               label1 = gen_label_rtx ();
4604               label2 = gen_label_rtx ();
4605               label3 = gen_label_rtx ();
4606               label4 = gen_label_rtx ();
4607               label5 = gen_label_rtx ();
4608               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4609               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4610                                compute_mode, label1);
4611               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4612                                   quotient, 0, OPTAB_LIB_WIDEN);
4613               if (tem != quotient)
4614                 emit_move_insn (quotient, tem);
4615               emit_jump_insn (gen_jump (label5));
4616               emit_barrier ();
4617               emit_label (label1);
4618               expand_dec (adjusted_op0, const1_rtx);
4619               emit_jump_insn (gen_jump (label4));
4620               emit_barrier ();
4621               emit_label (label2);
4622               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4623                                compute_mode, label3);
4624               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4625                                   quotient, 0, OPTAB_LIB_WIDEN);
4626               if (tem != quotient)
4627                 emit_move_insn (quotient, tem);
4628               emit_jump_insn (gen_jump (label5));
4629               emit_barrier ();
4630               emit_label (label3);
4631               expand_inc (adjusted_op0, const1_rtx);
4632               emit_label (label4);
4633               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4634                                   quotient, 0, OPTAB_LIB_WIDEN);
4635               if (tem != quotient)
4636                 emit_move_insn (quotient, tem);
4637               expand_inc (quotient, const1_rtx);
4638               emit_label (label5);
4639             }
4640           }
4641         break;
4642
4643       case EXACT_DIV_EXPR:
4644         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4645           {
4646             HOST_WIDE_INT d = INTVAL (op1);
4647             unsigned HOST_WIDE_INT ml;
4648             int pre_shift;
4649             rtx t1;
4650
4651             pre_shift = floor_log2 (d & -d);
4652             ml = invert_mod2n (d >> pre_shift, size);
4653             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4654                                build_int_cst (NULL_TREE, pre_shift),
4655                                NULL_RTX, unsignedp);
4656             quotient = expand_mult (compute_mode, t1,
4657                                     gen_int_mode (ml, compute_mode),
4658                                     NULL_RTX, 1);
4659
4660             insn = get_last_insn ();
4661             set_unique_reg_note (insn,
4662                                  REG_EQUAL,
4663                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4664                                                  compute_mode,
4665                                                  op0, op1));
4666           }
4667         break;
4668
4669       case ROUND_DIV_EXPR:
4670       case ROUND_MOD_EXPR:
4671         if (unsignedp)
4672           {
4673             rtx tem;
4674             rtx label;
4675             label = gen_label_rtx ();
4676             quotient = gen_reg_rtx (compute_mode);
4677             remainder = gen_reg_rtx (compute_mode);
4678             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4679               {
4680                 rtx tem;
4681                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4682                                          quotient, 1, OPTAB_LIB_WIDEN);
4683                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4684                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4685                                           remainder, 1, OPTAB_LIB_WIDEN);
4686               }
4687             tem = plus_constant (op1, -1);
4688             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4689                                 build_int_cst (NULL_TREE, 1),
4690                                 NULL_RTX, 1);
4691             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4692             expand_inc (quotient, const1_rtx);
4693             expand_dec (remainder, op1);
4694             emit_label (label);
4695           }
4696         else
4697           {
4698             rtx abs_rem, abs_op1, tem, mask;
4699             rtx label;
4700             label = gen_label_rtx ();
4701             quotient = gen_reg_rtx (compute_mode);
4702             remainder = gen_reg_rtx (compute_mode);
4703             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4704               {
4705                 rtx tem;
4706                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4707                                          quotient, 0, OPTAB_LIB_WIDEN);
4708                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4709                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4710                                           remainder, 0, OPTAB_LIB_WIDEN);
4711               }
4712             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4713             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4714             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4715                                 build_int_cst (NULL_TREE, 1),
4716                                 NULL_RTX, 1);
4717             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4718             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4719                                 NULL_RTX, 0, OPTAB_WIDEN);
4720             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4721                                  build_int_cst (NULL_TREE, size - 1),
4722                                  NULL_RTX, 0);
4723             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4724                                 NULL_RTX, 0, OPTAB_WIDEN);
4725             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4726                                 NULL_RTX, 0, OPTAB_WIDEN);
4727             expand_inc (quotient, tem);
4728             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4729                                 NULL_RTX, 0, OPTAB_WIDEN);
4730             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4731                                 NULL_RTX, 0, OPTAB_WIDEN);
4732             expand_dec (remainder, tem);
4733             emit_label (label);
4734           }
4735         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4736
4737       default:
4738         gcc_unreachable ();
4739       }
4740
4741   if (quotient == 0)
4742     {
4743       if (target && GET_MODE (target) != compute_mode)
4744         target = 0;
4745
4746       if (rem_flag)
4747         {
4748           /* Try to produce the remainder without producing the quotient.
4749              If we seem to have a divmod pattern that does not require widening,
4750              don't try widening here.  We should really have a WIDEN argument
4751              to expand_twoval_binop, since what we'd really like to do here is
4752              1) try a mod insn in compute_mode
4753              2) try a divmod insn in compute_mode
4754              3) try a div insn in compute_mode and multiply-subtract to get
4755                 remainder
4756              4) try the same things with widening allowed.  */
4757           remainder
4758             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4759                                  op0, op1, target,
4760                                  unsignedp,
4761                                  ((optab_handler (optab2, compute_mode)->insn_code
4762                                    != CODE_FOR_nothing)
4763                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4764           if (remainder == 0)
4765             {
4766               /* No luck there.  Can we do remainder and divide at once
4767                  without a library call?  */
4768               remainder = gen_reg_rtx (compute_mode);
4769               if (! expand_twoval_binop ((unsignedp
4770                                           ? udivmod_optab
4771                                           : sdivmod_optab),
4772                                          op0, op1,
4773                                          NULL_RTX, remainder, unsignedp))
4774                 remainder = 0;
4775             }
4776
4777           if (remainder)
4778             return gen_lowpart (mode, remainder);
4779         }
4780
4781       /* Produce the quotient.  Try a quotient insn, but not a library call.
4782          If we have a divmod in this mode, use it in preference to widening
4783          the div (for this test we assume it will not fail). Note that optab2
4784          is set to the one of the two optabs that the call below will use.  */
4785       quotient
4786         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4787                              op0, op1, rem_flag ? NULL_RTX : target,
4788                              unsignedp,
4789                              ((optab_handler (optab2, compute_mode)->insn_code
4790                                != CODE_FOR_nothing)
4791                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4792
4793       if (quotient == 0)
4794         {
4795           /* No luck there.  Try a quotient-and-remainder insn,
4796              keeping the quotient alone.  */
4797           quotient = gen_reg_rtx (compute_mode);
4798           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4799                                      op0, op1,
4800                                      quotient, NULL_RTX, unsignedp))
4801             {
4802               quotient = 0;
4803               if (! rem_flag)
4804                 /* Still no luck.  If we are not computing the remainder,
4805                    use a library call for the quotient.  */
4806                 quotient = sign_expand_binop (compute_mode,
4807                                               udiv_optab, sdiv_optab,
4808                                               op0, op1, target,
4809                                               unsignedp, OPTAB_LIB_WIDEN);
4810             }
4811         }
4812     }
4813
4814   if (rem_flag)
4815     {
4816       if (target && GET_MODE (target) != compute_mode)
4817         target = 0;
4818
4819       if (quotient == 0)
4820         {
4821           /* No divide instruction either.  Use library for remainder.  */
4822           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4823                                          op0, op1, target,
4824                                          unsignedp, OPTAB_LIB_WIDEN);
4825           /* No remainder function.  Try a quotient-and-remainder
4826              function, keeping the remainder.  */
4827           if (!remainder)
4828             {
4829               remainder = gen_reg_rtx (compute_mode);
4830               if (!expand_twoval_binop_libfunc
4831                   (unsignedp ? udivmod_optab : sdivmod_optab,
4832                    op0, op1,
4833                    NULL_RTX, remainder,
4834                    unsignedp ? UMOD : MOD))
4835                 remainder = NULL_RTX;
4836             }
4837         }
4838       else
4839         {
4840           /* We divided.  Now finish doing X - Y * (X / Y).  */
4841           remainder = expand_mult (compute_mode, quotient, op1,
4842                                    NULL_RTX, unsignedp);
4843           remainder = expand_binop (compute_mode, sub_optab, op0,
4844                                     remainder, target, unsignedp,
4845                                     OPTAB_LIB_WIDEN);
4846         }
4847     }
4848
4849   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4850 }
4851 \f
4852 /* Return a tree node with data type TYPE, describing the value of X.
4853    Usually this is an VAR_DECL, if there is no obvious better choice.
4854    X may be an expression, however we only support those expressions
4855    generated by loop.c.  */
4856
4857 tree
4858 make_tree (tree type, rtx x)
4859 {
4860   tree t;
4861
4862   switch (GET_CODE (x))
4863     {
4864     case CONST_INT:
4865       {
4866         HOST_WIDE_INT hi = 0;
4867
4868         if (INTVAL (x) < 0
4869             && !(TYPE_UNSIGNED (type)
4870                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4871                      < HOST_BITS_PER_WIDE_INT)))
4872           hi = -1;
4873
4874         t = build_int_cst_wide (type, INTVAL (x), hi);
4875
4876         return t;
4877       }
4878
4879     case CONST_DOUBLE:
4880       if (GET_MODE (x) == VOIDmode)
4881         t = build_int_cst_wide (type,
4882                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4883       else
4884         {
4885           REAL_VALUE_TYPE d;
4886
4887           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4888           t = build_real (type, d);
4889         }
4890
4891       return t;
4892
4893     case CONST_VECTOR:
4894       {
4895         int units = CONST_VECTOR_NUNITS (x);
4896         tree itype = TREE_TYPE (type);
4897         tree t = NULL_TREE;
4898         int i;
4899
4900
4901         /* Build a tree with vector elements.  */
4902         for (i = units - 1; i >= 0; --i)
4903           {
4904             rtx elt = CONST_VECTOR_ELT (x, i);
4905             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4906           }
4907
4908         return build_vector (type, t);
4909       }
4910
4911     case PLUS:
4912       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4913                           make_tree (type, XEXP (x, 1)));
4914
4915     case MINUS:
4916       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4917                           make_tree (type, XEXP (x, 1)));
4918
4919     case NEG:
4920       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4921
4922     case MULT:
4923       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4924                           make_tree (type, XEXP (x, 1)));
4925
4926     case ASHIFT:
4927       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4928                           make_tree (type, XEXP (x, 1)));
4929
4930     case LSHIFTRT:
4931       t = unsigned_type_for (type);
4932       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4933                                          make_tree (t, XEXP (x, 0)),
4934                                          make_tree (type, XEXP (x, 1))));
4935
4936     case ASHIFTRT:
4937       t = signed_type_for (type);
4938       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4939                                          make_tree (t, XEXP (x, 0)),
4940                                          make_tree (type, XEXP (x, 1))));
4941
4942     case DIV:
4943       if (TREE_CODE (type) != REAL_TYPE)
4944         t = signed_type_for (type);
4945       else
4946         t = type;
4947
4948       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4949                                          make_tree (t, XEXP (x, 0)),
4950                                          make_tree (t, XEXP (x, 1))));
4951     case UDIV:
4952       t = unsigned_type_for (type);
4953       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4954                                          make_tree (t, XEXP (x, 0)),
4955                                          make_tree (t, XEXP (x, 1))));
4956
4957     case SIGN_EXTEND:
4958     case ZERO_EXTEND:
4959       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4960                                           GET_CODE (x) == ZERO_EXTEND);
4961       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4962
4963     case CONST:
4964       return make_tree (type, XEXP (x, 0));
4965
4966     case SYMBOL_REF:
4967       t = SYMBOL_REF_DECL (x);
4968       if (t)
4969         return fold_convert (type, build_fold_addr_expr (t));
4970       /* else fall through.  */
4971
4972     default:
4973       t = build_decl (VAR_DECL, NULL_TREE, type);
4974
4975       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4976          ptr_mode.  So convert.  */
4977       if (POINTER_TYPE_P (type))
4978         x = convert_memory_address (TYPE_MODE (type), x);
4979
4980       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4981          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4982       t->decl_with_rtl.rtl = x;
4983
4984       return t;
4985     }
4986 }
4987 \f
4988 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4989    and returning TARGET.
4990
4991    If TARGET is 0, a pseudo-register or constant is returned.  */
4992
4993 rtx
4994 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4995 {
4996   rtx tem = 0;
4997
4998   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4999     tem = simplify_binary_operation (AND, mode, op0, op1);
5000   if (tem == 0)
5001     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5002
5003   if (target == 0)
5004     target = tem;
5005   else if (tem != target)
5006     emit_move_insn (target, tem);
5007   return target;
5008 }
5009 \f
5010 /* Helper function for emit_store_flag.  */
5011 static rtx
5012 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5013                    int normalizep)
5014 {
5015   rtx op0;
5016   enum machine_mode target_mode = GET_MODE (target);
5017
5018   /* If we are converting to a wider mode, first convert to
5019      TARGET_MODE, then normalize.  This produces better combining
5020      opportunities on machines that have a SIGN_EXTRACT when we are
5021      testing a single bit.  This mostly benefits the 68k.
5022
5023      If STORE_FLAG_VALUE does not have the sign bit set when
5024      interpreted in MODE, we can do this conversion as unsigned, which
5025      is usually more efficient.  */
5026   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5027     {
5028       convert_move (target, subtarget,
5029                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5030                     && 0 == (STORE_FLAG_VALUE
5031                              & ((HOST_WIDE_INT) 1
5032                                 << (GET_MODE_BITSIZE (mode) -1))));
5033       op0 = target;
5034       mode = target_mode;
5035     }
5036   else
5037     op0 = subtarget;
5038
5039   /* If we want to keep subexpressions around, don't reuse our last
5040      target.  */
5041   if (optimize)
5042     subtarget = 0;
5043
5044   /* Now normalize to the proper value in MODE.  Sometimes we don't
5045      have to do anything.  */
5046   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5047     ;
5048   /* STORE_FLAG_VALUE might be the most negative number, so write
5049      the comparison this way to avoid a compiler-time warning.  */
5050   else if (- normalizep == STORE_FLAG_VALUE)
5051     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5052
5053   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5054      it hard to use a value of just the sign bit due to ANSI integer
5055      constant typing rules.  */
5056   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5057            && (STORE_FLAG_VALUE
5058                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5059     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5060                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5061                         normalizep == 1);
5062   else
5063     {
5064       gcc_assert (STORE_FLAG_VALUE & 1);
5065
5066       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5067       if (normalizep == -1)
5068         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5069     }
5070
5071   /* If we were converting to a smaller mode, do the conversion now.  */
5072   if (target_mode != mode)
5073     {
5074       convert_move (target, op0, 0);
5075       return target;
5076     }
5077   else
5078     return op0;
5079 }
5080
5081 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5082    and storing in TARGET.  Normally return TARGET.
5083    Return 0 if that cannot be done.
5084
5085    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5086    it is VOIDmode, they cannot both be CONST_INT.
5087
5088    UNSIGNEDP is for the case where we have to widen the operands
5089    to perform the operation.  It says to use zero-extension.
5090
5091    NORMALIZEP is 1 if we should convert the result to be either zero
5092    or one.  Normalize is -1 if we should convert the result to be
5093    either zero or -1.  If NORMALIZEP is zero, the result will be left
5094    "raw" out of the scc insn.  */
5095
5096 rtx
5097 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5098                  enum machine_mode mode, int unsignedp, int normalizep)
5099 {
5100   rtx subtarget;
5101   enum insn_code icode;
5102   enum machine_mode compare_mode;
5103   enum machine_mode target_mode = GET_MODE (target);
5104   rtx tem;
5105   rtx last = get_last_insn ();
5106   rtx pattern, comparison;
5107
5108   if (unsignedp)
5109     code = unsigned_condition (code);
5110
5111   /* If one operand is constant, make it the second one.  Only do this
5112      if the other operand is not constant as well.  */
5113
5114   if (swap_commutative_operands_p (op0, op1))
5115     {
5116       tem = op0;
5117       op0 = op1;
5118       op1 = tem;
5119       code = swap_condition (code);
5120     }
5121
5122   if (mode == VOIDmode)
5123     mode = GET_MODE (op0);
5124
5125   /* For some comparisons with 1 and -1, we can convert this to
5126      comparisons with zero.  This will often produce more opportunities for
5127      store-flag insns.  */
5128
5129   switch (code)
5130     {
5131     case LT:
5132       if (op1 == const1_rtx)
5133         op1 = const0_rtx, code = LE;
5134       break;
5135     case LE:
5136       if (op1 == constm1_rtx)
5137         op1 = const0_rtx, code = LT;
5138       break;
5139     case GE:
5140       if (op1 == const1_rtx)
5141         op1 = const0_rtx, code = GT;
5142       break;
5143     case GT:
5144       if (op1 == constm1_rtx)
5145         op1 = const0_rtx, code = GE;
5146       break;
5147     case GEU:
5148       if (op1 == const1_rtx)
5149         op1 = const0_rtx, code = NE;
5150       break;
5151     case LTU:
5152       if (op1 == const1_rtx)
5153         op1 = const0_rtx, code = EQ;
5154       break;
5155     default:
5156       break;
5157     }
5158
5159   /* If we are comparing a double-word integer with zero or -1, we can
5160      convert the comparison into one involving a single word.  */
5161   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5162       && GET_MODE_CLASS (mode) == MODE_INT
5163       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5164     {
5165       if ((code == EQ || code == NE)
5166           && (op1 == const0_rtx || op1 == constm1_rtx))
5167         {
5168           rtx op00, op01, op0both;
5169
5170           /* Do a logical OR or AND of the two words and compare the
5171              result.  */
5172           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5173           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5174           op0both = expand_binop (word_mode,
5175                                   op1 == const0_rtx ? ior_optab : and_optab,
5176                                   op00, op01, NULL_RTX, unsignedp,
5177                                   OPTAB_DIRECT);
5178
5179           if (op0both != 0)
5180             return emit_store_flag (target, code, op0both, op1, word_mode,
5181                                     unsignedp, normalizep);
5182         }
5183       else if ((code == LT || code == GE) && op1 == const0_rtx)
5184         {
5185           rtx op0h;
5186
5187           /* If testing the sign bit, can just test on high word.  */
5188           op0h = simplify_gen_subreg (word_mode, op0, mode,
5189                                       subreg_highpart_offset (word_mode,
5190                                                               mode));
5191           return emit_store_flag (target, code, op0h, op1, word_mode,
5192                                   unsignedp, normalizep);
5193         }
5194     }
5195
5196   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5197      complement of A (for GE) and shifting the sign bit to the low bit.  */
5198   if (op1 == const0_rtx && (code == LT || code == GE)
5199       && GET_MODE_CLASS (mode) == MODE_INT
5200       && (normalizep || STORE_FLAG_VALUE == 1
5201           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5202               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5203                   == ((unsigned HOST_WIDE_INT) 1
5204                       << (GET_MODE_BITSIZE (mode) - 1))))))
5205     {
5206       subtarget = target;
5207
5208       /* If the result is to be wider than OP0, it is best to convert it
5209          first.  If it is to be narrower, it is *incorrect* to convert it
5210          first.  */
5211       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5212         {
5213           op0 = convert_modes (target_mode, mode, op0, 0);
5214           mode = target_mode;
5215         }
5216
5217       if (target_mode != mode)
5218         subtarget = 0;
5219
5220       if (code == GE)
5221         op0 = expand_unop (mode, one_cmpl_optab, op0,
5222                            ((STORE_FLAG_VALUE == 1 || normalizep)
5223                             ? 0 : subtarget), 0);
5224
5225       if (STORE_FLAG_VALUE == 1 || normalizep)
5226         /* If we are supposed to produce a 0/1 value, we want to do
5227            a logical shift from the sign bit to the low-order bit; for
5228            a -1/0 value, we do an arithmetic shift.  */
5229         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5230                             size_int (GET_MODE_BITSIZE (mode) - 1),
5231                             subtarget, normalizep != -1);
5232
5233       if (mode != target_mode)
5234         op0 = convert_modes (target_mode, mode, op0, 0);
5235
5236       return op0;
5237     }
5238
5239   icode = setcc_gen_code[(int) code];
5240
5241   if (icode != CODE_FOR_nothing)
5242     {
5243       insn_operand_predicate_fn pred;
5244
5245       /* We think we may be able to do this with a scc insn.  Emit the
5246          comparison and then the scc insn.  */
5247
5248       do_pending_stack_adjust ();
5249       last = get_last_insn ();
5250
5251       comparison
5252         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5253       if (CONSTANT_P (comparison))
5254         {
5255           switch (GET_CODE (comparison))
5256             {
5257             case CONST_INT:
5258               if (comparison == const0_rtx)
5259                 return const0_rtx;
5260               break;
5261
5262 #ifdef FLOAT_STORE_FLAG_VALUE
5263             case CONST_DOUBLE:
5264               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5265                 return const0_rtx;
5266               break;
5267 #endif
5268             default:
5269               gcc_unreachable ();
5270             }
5271
5272           if (normalizep == 1)
5273             return const1_rtx;
5274           if (normalizep == -1)
5275             return constm1_rtx;
5276           return const_true_rtx;
5277         }
5278
5279       /* The code of COMPARISON may not match CODE if compare_from_rtx
5280          decided to swap its operands and reverse the original code.
5281
5282          We know that compare_from_rtx returns either a CONST_INT or
5283          a new comparison code, so it is safe to just extract the
5284          code from COMPARISON.  */
5285       code = GET_CODE (comparison);
5286
5287       /* Get a reference to the target in the proper mode for this insn.  */
5288       compare_mode = insn_data[(int) icode].operand[0].mode;
5289       subtarget = target;
5290       pred = insn_data[(int) icode].operand[0].predicate;
5291       if (optimize || ! (*pred) (subtarget, compare_mode))
5292         subtarget = gen_reg_rtx (compare_mode);
5293
5294       pattern = GEN_FCN (icode) (subtarget);
5295       if (pattern)
5296         {
5297           emit_insn (pattern);
5298           return emit_store_flag_1 (target, subtarget, compare_mode,
5299                                     normalizep);
5300         }
5301     }
5302   else
5303     {
5304       /* We don't have an scc insn, so try a cstore insn.  */
5305
5306       for (compare_mode = mode; compare_mode != VOIDmode;
5307            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5308         {
5309           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5310           if (icode != CODE_FOR_nothing)
5311             break;
5312         }
5313
5314       if (icode != CODE_FOR_nothing)
5315         {
5316           enum machine_mode result_mode
5317             = insn_data[(int) icode].operand[0].mode;
5318           rtx cstore_op0 = op0;
5319           rtx cstore_op1 = op1;
5320
5321           do_pending_stack_adjust ();
5322           last = get_last_insn ();
5323
5324           if (compare_mode != mode)
5325             {
5326               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5327                                           unsignedp);
5328               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5329                                           unsignedp);
5330             }
5331
5332           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5333                                                             compare_mode))
5334             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5335
5336           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5337                                                             compare_mode))
5338             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5339
5340           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5341                                        cstore_op1);
5342           subtarget = target;
5343
5344           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5345                             (subtarget, result_mode)))
5346             subtarget = gen_reg_rtx (result_mode);
5347
5348           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5349                                      cstore_op1);
5350
5351           if (pattern)
5352             {
5353               emit_insn (pattern);
5354               return emit_store_flag_1 (target, subtarget, result_mode,
5355                                         normalizep);
5356             }
5357         }
5358     }
5359
5360   delete_insns_since (last);
5361
5362   /* If optimizing, use different pseudo registers for each insn, instead
5363      of reusing the same pseudo.  This leads to better CSE, but slows
5364      down the compiler, since there are more pseudos */
5365   subtarget = (!optimize
5366                && (target_mode == mode)) ? target : NULL_RTX;
5367
5368   /* If we reached here, we can't do this with a scc insn.  However, there
5369      are some comparisons that can be done directly.  For example, if
5370      this is an equality comparison of integers, we can try to exclusive-or
5371      (or subtract) the two operands and use a recursive call to try the
5372      comparison with zero.  Don't do any of these cases if branches are
5373      very cheap.  */
5374
5375   if (BRANCH_COST (optimize_insn_for_speed_p (),
5376                    false) > 0
5377       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5378       && op1 != const0_rtx)
5379     {
5380       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5381                           OPTAB_WIDEN);
5382
5383       if (tem == 0)
5384         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5385                             OPTAB_WIDEN);
5386       if (tem != 0)
5387         tem = emit_store_flag (target, code, tem, const0_rtx,
5388                                mode, unsignedp, normalizep);
5389       if (tem == 0)
5390         delete_insns_since (last);
5391       return tem;
5392     }
5393
5394   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5395      the constant zero.  Reject all other comparisons at this point.  Only
5396      do LE and GT if branches are expensive since they are expensive on
5397      2-operand machines.  */
5398
5399   if (BRANCH_COST (optimize_insn_for_speed_p (),
5400                    false) == 0
5401       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5402       || (code != EQ && code != NE
5403           && (BRANCH_COST (optimize_insn_for_speed_p (),
5404                            false) <= 1 || (code != LE && code != GT))))
5405     return 0;
5406
5407   /* See what we need to return.  We can only return a 1, -1, or the
5408      sign bit.  */
5409
5410   if (normalizep == 0)
5411     {
5412       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5413         normalizep = STORE_FLAG_VALUE;
5414
5415       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5416                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5417                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5418         ;
5419       else
5420         return 0;
5421     }
5422
5423   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5424      do the necessary operation below.  */
5425
5426   tem = 0;
5427
5428   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5429      the sign bit set.  */
5430
5431   if (code == LE)
5432     {
5433       /* This is destructive, so SUBTARGET can't be OP0.  */
5434       if (rtx_equal_p (subtarget, op0))
5435         subtarget = 0;
5436
5437       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5438                           OPTAB_WIDEN);
5439       if (tem)
5440         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5441                             OPTAB_WIDEN);
5442     }
5443
5444   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5445      number of bits in the mode of OP0, minus one.  */
5446
5447   if (code == GT)
5448     {
5449       if (rtx_equal_p (subtarget, op0))
5450         subtarget = 0;
5451
5452       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5453                           size_int (GET_MODE_BITSIZE (mode) - 1),
5454                           subtarget, 0);
5455       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5456                           OPTAB_WIDEN);
5457     }
5458
5459   if (code == EQ || code == NE)
5460     {
5461       /* For EQ or NE, one way to do the comparison is to apply an operation
5462          that converts the operand into a positive number if it is nonzero
5463          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5464          for NE we negate.  This puts the result in the sign bit.  Then we
5465          normalize with a shift, if needed.
5466
5467          Two operations that can do the above actions are ABS and FFS, so try
5468          them.  If that doesn't work, and MODE is smaller than a full word,
5469          we can use zero-extension to the wider mode (an unsigned conversion)
5470          as the operation.  */
5471
5472       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5473          that is compensated by the subsequent overflow when subtracting
5474          one / negating.  */
5475
5476       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5477         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5478       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5479         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5480       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5481         {
5482           tem = convert_modes (word_mode, mode, op0, 1);
5483           mode = word_mode;
5484         }
5485
5486       if (tem != 0)
5487         {
5488           if (code == EQ)
5489             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5490                                 0, OPTAB_WIDEN);
5491           else
5492             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5493         }
5494
5495       /* If we couldn't do it that way, for NE we can "or" the two's complement
5496          of the value with itself.  For EQ, we take the one's complement of
5497          that "or", which is an extra insn, so we only handle EQ if branches
5498          are expensive.  */
5499
5500       if (tem == 0
5501           && (code == NE
5502               || BRANCH_COST (optimize_insn_for_speed_p (),
5503                               false) > 1))
5504         {
5505           if (rtx_equal_p (subtarget, op0))
5506             subtarget = 0;
5507
5508           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5509           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5510                               OPTAB_WIDEN);
5511
5512           if (tem && code == EQ)
5513             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5514         }
5515     }
5516
5517   if (tem && normalizep)
5518     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5519                         size_int (GET_MODE_BITSIZE (mode) - 1),
5520                         subtarget, normalizep == 1);
5521
5522   if (tem)
5523     {
5524       if (GET_MODE (tem) != target_mode)
5525         {
5526           convert_move (target, tem, 0);
5527           tem = target;
5528         }
5529       else if (!subtarget)
5530         {
5531           emit_move_insn (target, tem);
5532           tem = target;
5533         }
5534     }
5535   else
5536     delete_insns_since (last);
5537
5538   return tem;
5539 }
5540
5541 /* Like emit_store_flag, but always succeeds.  */
5542
5543 rtx
5544 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5545                        enum machine_mode mode, int unsignedp, int normalizep)
5546 {
5547   rtx tem, label;
5548
5549   /* First see if emit_store_flag can do the job.  */
5550   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5551   if (tem != 0)
5552     return tem;
5553
5554   if (normalizep == 0)
5555     normalizep = 1;
5556
5557   /* If this failed, we have to do this with set/compare/jump/set code.  */
5558
5559   if (!REG_P (target)
5560       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5561     target = gen_reg_rtx (GET_MODE (target));
5562
5563   emit_move_insn (target, const1_rtx);
5564   label = gen_label_rtx ();
5565   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5566                            NULL_RTX, label);
5567
5568   emit_move_insn (target, const0_rtx);
5569   emit_label (label);
5570
5571   return target;
5572 }
5573 \f
5574 /* Perform possibly multi-word comparison and conditional jump to LABEL
5575    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5576    now a thin wrapper around do_compare_rtx_and_jump.  */
5577
5578 static void
5579 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5580                  rtx label)
5581 {
5582   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5583   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5584                            NULL_RTX, NULL_RTX, label);
5585 }