gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost[2];
 102 static int add_cost[2][NUM_MACHINE_MODES];
 103 static int neg_cost[2][NUM_MACHINE_MODES];
 104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140   int speed;
 141
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (speed = 0; speed < 2; speed++)
 209     {
 210       crtl->maybe_hot_insn_p = speed;
 211       zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
 212
 213       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 214            mode != VOIDmode;
 215            mode = GET_MODE_WIDER_MODE (mode))
 216         {
 217           PUT_MODE (&all.reg, mode);
 218           PUT_MODE (&all.plus, mode);
 219           PUT_MODE (&all.neg, mode);
 220           PUT_MODE (&all.mult, mode);
 221           PUT_MODE (&all.sdiv, mode);
 222           PUT_MODE (&all.udiv, mode);
 223           PUT_MODE (&all.sdiv_32, mode);
 224           PUT_MODE (&all.smod_32, mode);
 225           PUT_MODE (&all.wide_trunc, mode);
 226           PUT_MODE (&all.shift, mode);
 227           PUT_MODE (&all.shift_mult, mode);
 228           PUT_MODE (&all.shift_add, mode);
 229           PUT_MODE (&all.shift_sub, mode);
 230
 231           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 232           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 233           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 234           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 235           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 236
 237           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 238                                           <= 2 * add_cost[speed][mode]);
 239           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 240                                           <= 4 * add_cost[speed][mode]);
 241
 242           wider_mode = GET_MODE_WIDER_MODE (mode);
 243           if (wider_mode != VOIDmode)
 244             {
 245               PUT_MODE (&all.zext, wider_mode);
 246               PUT_MODE (&all.wide_mult, wider_mode);
 247               PUT_MODE (&all.wide_lshr, wider_mode);
 248               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 249
 250               mul_widen_cost[speed][wider_mode]
 251                 = rtx_cost (&all.wide_mult, SET, speed);
 252               mul_highpart_cost[speed][mode]
 253                 = rtx_cost (&all.wide_trunc, SET, speed);
 254             }
 255
 256           shift_cost[speed][mode][0] = 0;
 257           shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
 258             = add_cost[speed][mode];
 259
 260           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 261           for (m = 1; m < n; m++)
 262             {
 263               XEXP (&all.shift, 1) = cint[m];
 264               XEXP (&all.shift_mult, 1) = pow2[m];
 265
 266               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 267               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 268               shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
 269             }
 270         }
 271     }
 272   default_rtl_profile ();
 273 }
 274
 275 /* Return an rtx representing minus the value of X.
 276    MODE is the intended mode of the result,
 277    useful if X is a CONST_INT.  */
 278
 279 rtx
 280 negate_rtx (enum machine_mode mode, rtx x)
 281 {
 282   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 283
 284   if (result == 0)
 285     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 286
 287   return result;
 288 }
 289
 290 /* Report on the availability of insv/extv/extzv and the desired mode
 291    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 292    is false; else the mode of the specified operand.  If OPNO is -1,
 293    all the caller cares about is whether the insn is available.  */
 294 enum machine_mode
 295 mode_for_extraction (enum extraction_pattern pattern, int opno)
 296 {
 297   const struct insn_data *data;
 298
 299   switch (pattern)
 300     {
 301     case EP_insv:
 302       if (HAVE_insv)
 303         {
 304           data = &insn_data[CODE_FOR_insv];
 305           break;
 306         }
 307       return MAX_MACHINE_MODE;
 308
 309     case EP_extv:
 310       if (HAVE_extv)
 311         {
 312           data = &insn_data[CODE_FOR_extv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     case EP_extzv:
 318       if (HAVE_extzv)
 319         {
 320           data = &insn_data[CODE_FOR_extzv];
 321           break;
 322         }
 323       return MAX_MACHINE_MODE;
 324
 325     default:
 326       gcc_unreachable ();
 327     }
 328
 329   if (opno == -1)
 330     return VOIDmode;
 331
 332   /* Everyone who uses this function used to follow it with
 333      if (result == VOIDmode) result = word_mode; */
 334   if (data->operand[opno].mode == VOIDmode)
 335     return word_mode;
 336   return data->operand[opno].mode;
 337 }
 338
 339 /* Return true if X, of mode MODE, matches the predicate for operand
 340    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 341    the ambient volatile_ok setting.  */
 342
 343 static bool
 344 check_predicate_volatile_ok (enum insn_code icode, int opno,
 345                              rtx x, enum machine_mode mode)
 346 {
 347   bool save_volatile_ok, result;
 348
 349   save_volatile_ok = volatile_ok;
 350   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 351   volatile_ok = save_volatile_ok;
 352   return result;
 353 }
 354 \f
 355 /* A subroutine of store_bit_field, with the same arguments.  Return true
 356    if the operation could be implemented.
 357
 358    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 359    no other way of implementing the operation.  If FALLBACK_P is false,
 360    return false instead.  */
 361
 362 static bool
 363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 364                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 365                    rtx value, bool fallback_p)
 366 {
 367   unsigned int unit
 368     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 369   unsigned HOST_WIDE_INT offset, bitpos;
 370   rtx op0 = str_rtx;
 371   int byte_offset;
 372   rtx orig_value;
 373
 374   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 375
 376   while (GET_CODE (op0) == SUBREG)
 377     {
 378       /* The following line once was done only if WORDS_BIG_ENDIAN,
 379          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 380          meaningful at a much higher level; when structures are copied
 381          between memory and regs, the higher-numbered regs
 382          always get higher addresses.  */
 383       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 384       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 385
 386       byte_offset = 0;
 387
 388       /* Paradoxical subregs need special handling on big endian machines.  */
 389       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 390         {
 391           int difference = inner_mode_size - outer_mode_size;
 392
 393           if (WORDS_BIG_ENDIAN)
 394             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 395           if (BYTES_BIG_ENDIAN)
 396             byte_offset += difference % UNITS_PER_WORD;
 397         }
 398       else
 399         byte_offset = SUBREG_BYTE (op0);
 400
 401       bitnum += byte_offset * BITS_PER_UNIT;
 402       op0 = SUBREG_REG (op0);
 403     }
 404
 405   /* No action is needed if the target is a register and if the field
 406      lies completely outside that register.  This can occur if the source
 407      code contains an out-of-bounds access to a small array.  */
 408   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 409     return true;
 410
 411   /* Use vec_set patterns for inserting parts of vectors whenever
 412      available.  */
 413   if (VECTOR_MODE_P (GET_MODE (op0))
 414       && !MEM_P (op0)
 415       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 416           != CODE_FOR_nothing)
 417       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 418       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 419       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 420     {
 421       enum machine_mode outermode = GET_MODE (op0);
 422       enum machine_mode innermode = GET_MODE_INNER (outermode);
 423       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 424       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 425       rtx rtxpos = GEN_INT (pos);
 426       rtx src = value;
 427       rtx dest = op0;
 428       rtx pat, seq;
 429       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 430       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 431       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 432
 433       start_sequence ();
 434
 435       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 436         src = copy_to_mode_reg (mode1, src);
 437
 438       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 439         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 440
 441       /* We could handle this, but we should always be called with a pseudo
 442          for our targets and all insns should take them as outputs.  */
 443       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 444                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 445                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 446       pat = GEN_FCN (icode) (dest, src, rtxpos);
 447       seq = get_insns ();
 448       end_sequence ();
 449       if (pat)
 450         {
 451           emit_insn (seq);
 452           emit_insn (pat);
 453           return true;
 454         }
 455     }
 456
 457   /* If the target is a register, overwriting the entire object, or storing
 458      a full-word or multi-word field can be done with just a SUBREG.
 459
 460      If the target is memory, storing any naturally aligned field can be
 461      done with a simple store.  For targets that support fast unaligned
 462      memory, any naturally sized, unit aligned field can be done directly.  */
 463
 464   offset = bitnum / unit;
 465   bitpos = bitnum % unit;
 466   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 467                 + (offset * UNITS_PER_WORD);
 468
 469   if (bitpos == 0
 470       && bitsize == GET_MODE_BITSIZE (fieldmode)
 471       && (!MEM_P (op0)
 472           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 473              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 474              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 475           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 476              || (offset * BITS_PER_UNIT % bitsize == 0
 477                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 478     {
 479       if (MEM_P (op0))
 480         op0 = adjust_address (op0, fieldmode, offset);
 481       else if (GET_MODE (op0) != fieldmode)
 482         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 483                                    byte_offset);
 484       emit_move_insn (op0, value);
 485       return true;
 486     }
 487
 488   /* Make sure we are playing with integral modes.  Pun with subregs
 489      if we aren't.  This must come after the entire register case above,
 490      since that case is valid for any mode.  The following cases are only
 491      valid for integral modes.  */
 492   {
 493     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 494     if (imode != GET_MODE (op0))
 495       {
 496         if (MEM_P (op0))
 497           op0 = adjust_address (op0, imode, 0);
 498         else
 499           {
 500             gcc_assert (imode != BLKmode);
 501             op0 = gen_lowpart (imode, op0);
 502           }
 503       }
 504   }
 505
 506   /* We may be accessing data outside the field, which means
 507      we can alias adjacent data.  */
 508   if (MEM_P (op0))
 509     {
 510       op0 = shallow_copy_rtx (op0);
 511       set_mem_alias_set (op0, 0);
 512       set_mem_expr (op0, 0);
 513     }
 514
 515   /* If OP0 is a register, BITPOS must count within a word.
 516      But as we have it, it counts within whatever size OP0 now has.
 517      On a bigendian machine, these are not the same, so convert.  */
 518   if (BYTES_BIG_ENDIAN
 519       && !MEM_P (op0)
 520       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 521     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 522
 523   /* Storing an lsb-aligned field in a register
 524      can be done with a movestrict instruction.  */
 525
 526   if (!MEM_P (op0)
 527       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 528       && bitsize == GET_MODE_BITSIZE (fieldmode)
 529       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 530           != CODE_FOR_nothing))
 531     {
 532       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 533       rtx insn;
 534       rtx start = get_last_insn ();
 535
 536       /* Get appropriate low part of the value being stored.  */
 537       if (GET_CODE (value) == CONST_INT || REG_P (value))
 538         value = gen_lowpart (fieldmode, value);
 539       else if (!(GET_CODE (value) == SYMBOL_REF
 540                  || GET_CODE (value) == LABEL_REF
 541                  || GET_CODE (value) == CONST))
 542         value = convert_to_mode (fieldmode, value, 0);
 543
 544       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 545         value = copy_to_mode_reg (fieldmode, value);
 546
 547       if (GET_CODE (op0) == SUBREG)
 548         {
 549           /* Else we've got some float mode source being extracted into
 550              a different float mode destination -- this combination of
 551              subregs results in Severe Tire Damage.  */
 552           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 553                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 554                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 555           op0 = SUBREG_REG (op0);
 556         }
 557
 558       insn = (GEN_FCN (icode)
 559                  (gen_rtx_SUBREG (fieldmode, op0,
 560                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 561                                   + (offset * UNITS_PER_WORD)),
 562                                   value));
 563       if (insn)
 564         {
 565           emit_insn (insn);
 566           return true;
 567         }
 568       delete_insns_since (start);
 569     }
 570
 571   /* Handle fields bigger than a word.  */
 572
 573   if (bitsize > BITS_PER_WORD)
 574     {
 575       /* Here we transfer the words of the field
 576          in the order least significant first.
 577          This is because the most significant word is the one which may
 578          be less than full.
 579          However, only do that if the value is not BLKmode.  */
 580
 581       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 582       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 583       unsigned int i;
 584       rtx last;
 585
 586       /* This is the mode we must force value to, so that there will be enough
 587          subwords to extract.  Note that fieldmode will often (always?) be
 588          VOIDmode, because that is what store_field uses to indicate that this
 589          is a bit field, but passing VOIDmode to operand_subword_force
 590          is not allowed.  */
 591       fieldmode = GET_MODE (value);
 592       if (fieldmode == VOIDmode)
 593         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 594
 595       last = get_last_insn ();
 596       for (i = 0; i < nwords; i++)
 597         {
 598           /* If I is 0, use the low-order word in both field and target;
 599              if I is 1, use the next to lowest word; and so on.  */
 600           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 601           unsigned int bit_offset = (backwards
 602                                      ? MAX ((int) bitsize - ((int) i + 1)
 603                                             * BITS_PER_WORD,
 604                                             0)
 605                                      : (int) i * BITS_PER_WORD);
 606           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 607
 608           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 609                                             bitsize - i * BITS_PER_WORD),
 610                                   bitnum + bit_offset, word_mode,
 611                                   value_word, fallback_p))
 612             {
 613               delete_insns_since (last);
 614               return false;
 615             }
 616         }
 617       return true;
 618     }
 619
 620   /* From here on we can assume that the field to be stored in is
 621      a full-word (whatever type that is), since it is shorter than a word.  */
 622
 623   /* OFFSET is the number of words or bytes (UNIT says which)
 624      from STR_RTX to the first word or byte containing part of the field.  */
 625
 626   if (!MEM_P (op0))
 627     {
 628       if (offset != 0
 629           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 630         {
 631           if (!REG_P (op0))
 632             {
 633               /* Since this is a destination (lvalue), we can't copy
 634                  it to a pseudo.  We can remove a SUBREG that does not
 635                  change the size of the operand.  Such a SUBREG may
 636                  have been added above.  */
 637               gcc_assert (GET_CODE (op0) == SUBREG
 638                           && (GET_MODE_SIZE (GET_MODE (op0))
 639                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 640               op0 = SUBREG_REG (op0);
 641             }
 642           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 643                                 op0, (offset * UNITS_PER_WORD));
 644         }
 645       offset = 0;
 646     }
 647
 648   /* If VALUE has a floating-point or complex mode, access it as an
 649      integer of the corresponding size.  This can occur on a machine
 650      with 64 bit registers that uses SFmode for float.  It can also
 651      occur for unaligned float or complex fields.  */
 652   orig_value = value;
 653   if (GET_MODE (value) != VOIDmode
 654       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 655       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 656     {
 657       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 658       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 659     }
 660
 661   /* Now OFFSET is nonzero only if OP0 is memory
 662      and is therefore always measured in bytes.  */
 663
 664   if (HAVE_insv
 665       && GET_MODE (value) != BLKmode
 666       && bitsize > 0
 667       && GET_MODE_BITSIZE (op_mode) >= bitsize
 668       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 669             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 670       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 671                                                         VOIDmode)
 672       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 673     {
 674       int xbitpos = bitpos;
 675       rtx value1;
 676       rtx xop0 = op0;
 677       rtx last = get_last_insn ();
 678       rtx pat;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in OP_MODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 691         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (op_mode);
 705
 706       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != op_mode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (op_mode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (op_mode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), op_mode);
 733           else
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             gcc_assert (CONSTANT_P (value));
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, op_mode)))
 745         value1 = force_reg (op_mode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         {
 750           emit_insn (pat);
 751           return true;
 752         }
 753       delete_insns_since (last);
 754     }
 755
 756   /* If OP0 is a memory, try copying it to a register and seeing if a
 757      cheap register alternative is available.  */
 758   if (HAVE_insv && MEM_P (op0))
 759     {
 760       enum machine_mode bestmode;
 761
 762       /* Get the mode to use for inserting into this field.  If OP0 is
 763          BLKmode, get the smallest mode consistent with the alignment. If
 764          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 765          mode. Otherwise, use the smallest mode containing the field.  */
 766
 767       if (GET_MODE (op0) == BLKmode
 768           || (op_mode != MAX_MACHINE_MODE
 769               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 770         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 771                                   (op_mode == MAX_MACHINE_MODE
 772                                    ? VOIDmode : op_mode),
 773                                   MEM_VOLATILE_P (op0));
 774       else
 775         bestmode = GET_MODE (op0);
 776
 777       if (bestmode != VOIDmode
 778           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 779           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 780                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 781         {
 782           rtx last, tempreg, xop0;
 783           unsigned HOST_WIDE_INT xoffset, xbitpos;
 784
 785           last = get_last_insn ();
 786
 787           /* Adjust address to point to the containing unit of
 788              that mode.  Compute the offset as a multiple of this unit,
 789              counting in bytes.  */
 790           unit = GET_MODE_BITSIZE (bestmode);
 791           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 792           xbitpos = bitnum % unit;
 793           xop0 = adjust_address (op0, bestmode, xoffset);
 794
 795           /* Fetch that unit, store the bitfield in it, then store
 796              the unit.  */
 797           tempreg = copy_to_reg (xop0);
 798           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 799                                  fieldmode, orig_value, false))
 800             {
 801               emit_move_insn (xop0, tempreg);
 802               return true;
 803             }
 804           delete_insns_since (last);
 805         }
 806     }
 807
 808   if (!fallback_p)
 809     return false;
 810
 811   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 812   return true;
 813 }
 814
 815 /* Generate code to store value from rtx VALUE
 816    into a bit-field within structure STR_RTX
 817    containing BITSIZE bits starting at bit BITNUM.
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 823                  rtx value)
 824 {
 825   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 826     gcc_unreachable ();
 827 }
 828 \f
 829 /* Use shifts and boolean operations to store VALUE
 830    into a bit field of width BITSIZE
 831    in a memory location specified by OP0 except offset by OFFSET bytes.
 832      (OFFSET must be 0 if OP0 is a register.)
 833    The field starts at position BITPOS within the byte.
 834     (If OP0 is a register, it may be a full word or a narrower mode,
 835      but BITPOS still counts within a full word,
 836      which is significant on bigendian machines.)  */
 837
 838 static void
 839 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 840                        unsigned HOST_WIDE_INT bitsize,
 841                        unsigned HOST_WIDE_INT bitpos, rtx value)
 842 {
 843   enum machine_mode mode;
 844   unsigned int total_bits = BITS_PER_WORD;
 845   rtx temp;
 846   int all_zero = 0;
 847   int all_one = 0;
 848
 849   /* There is a case not handled here:
 850      a structure with a known alignment of just a halfword
 851      and a field split across two aligned halfwords within the structure.
 852      Or likewise a structure with a known alignment of just a byte
 853      and a field split across two bytes.
 854      Such cases are not supposed to be able to occur.  */
 855
 856   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 857     {
 858       gcc_assert (!offset);
 859       /* Special treatment for a bit field split across two registers.  */
 860       if (bitsize + bitpos > BITS_PER_WORD)
 861         {
 862           store_split_bit_field (op0, bitsize, bitpos, value);
 863           return;
 864         }
 865     }
 866   else
 867     {
 868       /* Get the proper mode to use for this field.  We want a mode that
 869          includes the entire field.  If such a mode would be larger than
 870          a word, we won't be doing the extraction the normal way.
 871          We don't want a mode bigger than the destination.  */
 872
 873       mode = GET_MODE (op0);
 874       if (GET_MODE_BITSIZE (mode) == 0
 875           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 876         mode = word_mode;
 877       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 878                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 879
 880       if (mode == VOIDmode)
 881         {
 882           /* The only way this should occur is if the field spans word
 883              boundaries.  */
 884           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 885                                  value);
 886           return;
 887         }
 888
 889       total_bits = GET_MODE_BITSIZE (mode);
 890
 891       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 892          be in the range 0 to total_bits-1, and put any excess bytes in
 893          OFFSET.  */
 894       if (bitpos >= total_bits)
 895         {
 896           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 897           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 898                      * BITS_PER_UNIT);
 899         }
 900
 901       /* Get ref to an aligned byte, halfword, or word containing the field.
 902          Adjust BITPOS to be position within a word,
 903          and OFFSET to be the offset of that word.
 904          Then alter OP0 to refer to that word.  */
 905       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 906       offset -= (offset % (total_bits / BITS_PER_UNIT));
 907       op0 = adjust_address (op0, mode, offset);
 908     }
 909
 910   mode = GET_MODE (op0);
 911
 912   /* Now MODE is either some integral mode for a MEM as OP0,
 913      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 914      The bit field is contained entirely within OP0.
 915      BITPOS is the starting bit number within OP0.
 916      (OP0's mode may actually be narrower than MODE.)  */
 917
 918   if (BYTES_BIG_ENDIAN)
 919       /* BITPOS is the distance between our msb
 920          and that of the containing datum.
 921          Convert it to the distance from the lsb.  */
 922       bitpos = total_bits - bitsize - bitpos;
 923
 924   /* Now BITPOS is always the distance between our lsb
 925      and that of OP0.  */
 926
 927   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 928      we must first convert its mode to MODE.  */
 929
 930   if (GET_CODE (value) == CONST_INT)
 931     {
 932       HOST_WIDE_INT v = INTVAL (value);
 933
 934       if (bitsize < HOST_BITS_PER_WIDE_INT)
 935         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 936
 937       if (v == 0)
 938         all_zero = 1;
 939       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 940                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 941                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 942         all_one = 1;
 943
 944       value = lshift_value (mode, value, bitpos, bitsize);
 945     }
 946   else
 947     {
 948       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 949                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 950
 951       if (GET_MODE (value) != mode)
 952         {
 953           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 954               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 955             value = gen_lowpart (mode, value);
 956           else
 957             value = convert_to_mode (mode, value, 1);
 958         }
 959
 960       if (must_and)
 961         value = expand_binop (mode, and_optab, value,
 962                               mask_rtx (mode, 0, bitsize, 0),
 963                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 964       if (bitpos > 0)
 965         value = expand_shift (LSHIFT_EXPR, mode, value,
 966                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 967     }
 968
 969   /* Now clear the chosen bits in OP0,
 970      except that if VALUE is -1 we need not bother.  */
 971   /* We keep the intermediates in registers to allow CSE to combine
 972      consecutive bitfield assignments.  */
 973
 974   temp = force_reg (mode, op0);
 975
 976   if (! all_one)
 977     {
 978       temp = expand_binop (mode, and_optab, temp,
 979                            mask_rtx (mode, bitpos, bitsize, 1),
 980                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 981       temp = force_reg (mode, temp);
 982     }
 983
 984   /* Now logical-or VALUE into OP0, unless it is zero.  */
 985
 986   if (! all_zero)
 987     {
 988       temp = expand_binop (mode, ior_optab, temp, value,
 989                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 990       temp = force_reg (mode, temp);
 991     }
 992
 993   if (op0 != temp)
 994     {
 995       op0 = copy_rtx (op0);
 996       emit_move_insn (op0, temp);
 997     }
 998 }
 999 \f
1000 /* Store a bit field that is split across multiple accessible memory objects.
1001
1002    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1003    BITSIZE is the field width; BITPOS the position of its first bit
1004    (within the word).
1005    VALUE is the value to store.
1006
1007    This does not yet handle fields wider than BITS_PER_WORD.  */
1008
1009 static void
1010 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011                        unsigned HOST_WIDE_INT bitpos, rtx value)
1012 {
1013   unsigned int unit;
1014   unsigned int bitsdone = 0;
1015
1016   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1017      much at a time.  */
1018   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1019     unit = BITS_PER_WORD;
1020   else
1021     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1022
1023   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1024      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1025      that VALUE might be a floating-point constant.  */
1026   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1027     {
1028       rtx word = gen_lowpart_common (word_mode, value);
1029
1030       if (word && (value != word))
1031         value = word;
1032       else
1033         value = gen_lowpart_common (word_mode,
1034                                     force_reg (GET_MODE (value) != VOIDmode
1035                                                ? GET_MODE (value)
1036                                                : word_mode, value));
1037     }
1038
1039   while (bitsdone < bitsize)
1040     {
1041       unsigned HOST_WIDE_INT thissize;
1042       rtx part, word;
1043       unsigned HOST_WIDE_INT thispos;
1044       unsigned HOST_WIDE_INT offset;
1045
1046       offset = (bitpos + bitsdone) / unit;
1047       thispos = (bitpos + bitsdone) % unit;
1048
1049       /* THISSIZE must not overrun a word boundary.  Otherwise,
1050          store_fixed_bit_field will call us again, and we will mutually
1051          recurse forever.  */
1052       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1053       thissize = MIN (thissize, unit - thispos);
1054
1055       if (BYTES_BIG_ENDIAN)
1056         {
1057           int total_bits;
1058
1059           /* We must do an endian conversion exactly the same way as it is
1060              done in extract_bit_field, so that the two calls to
1061              extract_fixed_bit_field will have comparable arguments.  */
1062           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1063             total_bits = BITS_PER_WORD;
1064           else
1065             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1066
1067           /* Fetch successively less significant portions.  */
1068           if (GET_CODE (value) == CONST_INT)
1069             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1070                              >> (bitsize - bitsdone - thissize))
1071                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1072           else
1073             /* The args are chosen so that the last part includes the
1074                lsb.  Give extract_bit_field the value it needs (with
1075                endianness compensation) to fetch the piece we want.  */
1076             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1077                                             total_bits - bitsize + bitsdone,
1078                                             NULL_RTX, 1);
1079         }
1080       else
1081         {
1082           /* Fetch successively more significant portions.  */
1083           if (GET_CODE (value) == CONST_INT)
1084             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1085                              >> bitsdone)
1086                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1087           else
1088             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1089                                             bitsdone, NULL_RTX, 1);
1090         }
1091
1092       /* If OP0 is a register, then handle OFFSET here.
1093
1094          When handling multiword bitfields, extract_bit_field may pass
1095          down a word_mode SUBREG of a larger REG for a bitfield that actually
1096          crosses a word boundary.  Thus, for a SUBREG, we must find
1097          the current word starting from the base register.  */
1098       if (GET_CODE (op0) == SUBREG)
1099         {
1100           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1101           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1102                                         GET_MODE (SUBREG_REG (op0)));
1103           offset = 0;
1104         }
1105       else if (REG_P (op0))
1106         {
1107           word = operand_subword_force (op0, offset, GET_MODE (op0));
1108           offset = 0;
1109         }
1110       else
1111         word = op0;
1112
1113       /* OFFSET is in UNITs, and UNIT is in bits.
1114          store_fixed_bit_field wants offset in bytes.  */
1115       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1116                              thispos, part);
1117       bitsdone += thissize;
1118     }
1119 }
1120 \f
1121 /* A subroutine of extract_bit_field_1 that converts return value X
1122    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1123    to extract_bit_field.  */
1124
1125 static rtx
1126 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1127                              enum machine_mode tmode, bool unsignedp)
1128 {
1129   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1130     return x;
1131
1132   /* If the x mode is not a scalar integral, first convert to the
1133      integer mode of that size and then access it as a floating-point
1134      value via a SUBREG.  */
1135   if (!SCALAR_INT_MODE_P (tmode))
1136     {
1137       enum machine_mode smode;
1138
1139       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1140       x = convert_to_mode (smode, x, unsignedp);
1141       x = force_reg (smode, x);
1142       return gen_lowpart (tmode, x);
1143     }
1144
1145   return convert_to_mode (tmode, x, unsignedp);
1146 }
1147
1148 /* A subroutine of extract_bit_field, with the same arguments.
1149    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1150    if we can find no other means of implementing the operation.
1151    if FALLBACK_P is false, return NULL instead.  */
1152
1153 static rtx
1154 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1155                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1156                      enum machine_mode mode, enum machine_mode tmode,
1157                      bool fallback_p)
1158 {
1159   unsigned int unit
1160     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1161   unsigned HOST_WIDE_INT offset, bitpos;
1162   rtx op0 = str_rtx;
1163   enum machine_mode int_mode;
1164   enum machine_mode ext_mode;
1165   enum machine_mode mode1;
1166   enum insn_code icode;
1167   int byte_offset;
1168
1169   if (tmode == VOIDmode)
1170     tmode = mode;
1171
1172   while (GET_CODE (op0) == SUBREG)
1173     {
1174       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1175       op0 = SUBREG_REG (op0);
1176     }
1177
1178   /* If we have an out-of-bounds access to a register, just return an
1179      uninitialized register of the required mode.  This can occur if the
1180      source code contains an out-of-bounds access to a small array.  */
1181   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1182     return gen_reg_rtx (tmode);
1183
1184   if (REG_P (op0)
1185       && mode == GET_MODE (op0)
1186       && bitnum == 0
1187       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1188     {
1189       /* We're trying to extract a full register from itself.  */
1190       return op0;
1191     }
1192
1193   /* See if we can get a better vector mode before extracting.  */
1194   if (VECTOR_MODE_P (GET_MODE (op0))
1195       && !MEM_P (op0)
1196       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1197     {
1198       enum machine_mode new_mode;
1199       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1200
1201       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1202         new_mode = MIN_MODE_VECTOR_FLOAT;
1203       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1204         new_mode = MIN_MODE_VECTOR_FRACT;
1205       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1206         new_mode = MIN_MODE_VECTOR_UFRACT;
1207       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1208         new_mode = MIN_MODE_VECTOR_ACCUM;
1209       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1210         new_mode = MIN_MODE_VECTOR_UACCUM;
1211       else
1212         new_mode = MIN_MODE_VECTOR_INT;
1213
1214       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1215         if (GET_MODE_NUNITS (new_mode) == nunits
1216             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1217             && targetm.vector_mode_supported_p (new_mode))
1218           break;
1219       if (new_mode != VOIDmode)
1220         op0 = gen_lowpart (new_mode, op0);
1221     }
1222
1223   /* Use vec_extract patterns for extracting parts of vectors whenever
1224      available.  */
1225   if (VECTOR_MODE_P (GET_MODE (op0))
1226       && !MEM_P (op0)
1227       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1228           != CODE_FOR_nothing)
1229       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1230           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1231     {
1232       enum machine_mode outermode = GET_MODE (op0);
1233       enum machine_mode innermode = GET_MODE_INNER (outermode);
1234       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1235       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1236       rtx rtxpos = GEN_INT (pos);
1237       rtx src = op0;
1238       rtx dest = NULL, pat, seq;
1239       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1240       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1241       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1242
1243       if (innermode == tmode || innermode == mode)
1244         dest = target;
1245
1246       if (!dest)
1247         dest = gen_reg_rtx (innermode);
1248
1249       start_sequence ();
1250
1251       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1252         dest = copy_to_mode_reg (mode0, dest);
1253
1254       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1255         src = copy_to_mode_reg (mode1, src);
1256
1257       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1258         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1259
1260       /* We could handle this, but we should always be called with a pseudo
1261          for our targets and all insns should take them as outputs.  */
1262       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1263                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1264                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1265
1266       pat = GEN_FCN (icode) (dest, src, rtxpos);
1267       seq = get_insns ();
1268       end_sequence ();
1269       if (pat)
1270         {
1271           emit_insn (seq);
1272           emit_insn (pat);
1273           if (mode0 != mode)
1274             return gen_lowpart (tmode, dest);
1275           return dest;
1276         }
1277     }
1278
1279   /* Make sure we are playing with integral modes.  Pun with subregs
1280      if we aren't.  */
1281   {
1282     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1283     if (imode != GET_MODE (op0))
1284       {
1285         if (MEM_P (op0))
1286           op0 = adjust_address (op0, imode, 0);
1287         else
1288           {
1289             gcc_assert (imode != BLKmode);
1290             op0 = gen_lowpart (imode, op0);
1291
1292             /* If we got a SUBREG, force it into a register since we
1293                aren't going to be able to do another SUBREG on it.  */
1294             if (GET_CODE (op0) == SUBREG)
1295               op0 = force_reg (imode, op0);
1296           }
1297       }
1298   }
1299
1300   /* We may be accessing data outside the field, which means
1301      we can alias adjacent data.  */
1302   if (MEM_P (op0))
1303     {
1304       op0 = shallow_copy_rtx (op0);
1305       set_mem_alias_set (op0, 0);
1306       set_mem_expr (op0, 0);
1307     }
1308
1309   /* Extraction of a full-word or multi-word value from a structure
1310      in a register or aligned memory can be done with just a SUBREG.
1311      A subword value in the least significant part of a register
1312      can also be extracted with a SUBREG.  For this, we need the
1313      byte offset of the value in op0.  */
1314
1315   bitpos = bitnum % unit;
1316   offset = bitnum / unit;
1317   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1318
1319   /* If OP0 is a register, BITPOS must count within a word.
1320      But as we have it, it counts within whatever size OP0 now has.
1321      On a bigendian machine, these are not the same, so convert.  */
1322   if (BYTES_BIG_ENDIAN
1323       && !MEM_P (op0)
1324       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1325     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1326
1327   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1328      If that's wrong, the solution is to test for it and set TARGET to 0
1329      if needed.  */
1330
1331   /* Only scalar integer modes can be converted via subregs.  There is an
1332      additional problem for FP modes here in that they can have a precision
1333      which is different from the size.  mode_for_size uses precision, but
1334      we want a mode based on the size, so we must avoid calling it for FP
1335      modes.  */
1336   mode1  = (SCALAR_INT_MODE_P (tmode)
1337             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1338             : mode);
1339
1340   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1341         && bitpos % BITS_PER_WORD == 0)
1342        || (mode1 != BLKmode
1343            /* ??? The big endian test here is wrong.  This is correct
1344               if the value is in a register, and if mode_for_size is not
1345               the same mode as op0.  This causes us to get unnecessarily
1346               inefficient code from the Thumb port when -mbig-endian.  */
1347            && (BYTES_BIG_ENDIAN
1348                ? bitpos + bitsize == BITS_PER_WORD
1349                : bitpos == 0)))
1350       && ((!MEM_P (op0)
1351            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1352                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1353            && GET_MODE_SIZE (mode1) != 0
1354            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1355           || (MEM_P (op0)
1356               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1357                   || (offset * BITS_PER_UNIT % bitsize == 0
1358                       && MEM_ALIGN (op0) % bitsize == 0)))))
1359     {
1360       if (MEM_P (op0))
1361         op0 = adjust_address (op0, mode1, offset);
1362       else if (mode1 != GET_MODE (op0))
1363         {
1364           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1365                                          byte_offset);
1366           if (sub == NULL)
1367             goto no_subreg_mode_swap;
1368           op0 = sub;
1369         }
1370       if (mode1 != mode)
1371         return convert_to_mode (tmode, op0, unsignedp);
1372       return op0;
1373     }
1374  no_subreg_mode_swap:
1375
1376   /* Handle fields bigger than a word.  */
1377
1378   if (bitsize > BITS_PER_WORD)
1379     {
1380       /* Here we transfer the words of the field
1381          in the order least significant first.
1382          This is because the most significant word is the one which may
1383          be less than full.  */
1384
1385       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1386       unsigned int i;
1387
1388       if (target == 0 || !REG_P (target))
1389         target = gen_reg_rtx (mode);
1390
1391       /* Indicate for flow that the entire target reg is being set.  */
1392       emit_clobber (target);
1393
1394       for (i = 0; i < nwords; i++)
1395         {
1396           /* If I is 0, use the low-order word in both field and target;
1397              if I is 1, use the next to lowest word; and so on.  */
1398           /* Word number in TARGET to use.  */
1399           unsigned int wordnum
1400             = (WORDS_BIG_ENDIAN
1401                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1402                : i);
1403           /* Offset from start of field in OP0.  */
1404           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1405                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1406                                                 * (int) BITS_PER_WORD))
1407                                      : (int) i * BITS_PER_WORD);
1408           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1409           rtx result_part
1410             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1411                                            bitsize - i * BITS_PER_WORD),
1412                                  bitnum + bit_offset, 1, target_part, mode,
1413                                  word_mode);
1414
1415           gcc_assert (target_part);
1416
1417           if (result_part != target_part)
1418             emit_move_insn (target_part, result_part);
1419         }
1420
1421       if (unsignedp)
1422         {
1423           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1424              need to be zero'd out.  */
1425           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1426             {
1427               unsigned int i, total_words;
1428
1429               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1430               for (i = nwords; i < total_words; i++)
1431                 emit_move_insn
1432                   (operand_subword (target,
1433                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1434                                     1, VOIDmode),
1435                    const0_rtx);
1436             }
1437           return target;
1438         }
1439
1440       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1441       target = expand_shift (LSHIFT_EXPR, mode, target,
1442                              build_int_cst (NULL_TREE,
1443                                             GET_MODE_BITSIZE (mode) - bitsize),
1444                              NULL_RTX, 0);
1445       return expand_shift (RSHIFT_EXPR, mode, target,
1446                            build_int_cst (NULL_TREE,
1447                                           GET_MODE_BITSIZE (mode) - bitsize),
1448                            NULL_RTX, 0);
1449     }
1450
1451   /* From here on we know the desired field is smaller than a word.  */
1452
1453   /* Check if there is a correspondingly-sized integer field, so we can
1454      safely extract it as one size of integer, if necessary; then
1455      truncate or extend to the size that is wanted; then use SUBREGs or
1456      convert_to_mode to get one of the modes we really wanted.  */
1457
1458   int_mode = int_mode_for_mode (tmode);
1459   if (int_mode == BLKmode)
1460     int_mode = int_mode_for_mode (mode);
1461   /* Should probably push op0 out to memory and then do a load.  */
1462   gcc_assert (int_mode != BLKmode);
1463
1464   /* OFFSET is the number of words or bytes (UNIT says which)
1465      from STR_RTX to the first word or byte containing part of the field.  */
1466   if (!MEM_P (op0))
1467     {
1468       if (offset != 0
1469           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1470         {
1471           if (!REG_P (op0))
1472             op0 = copy_to_reg (op0);
1473           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1474                                 op0, (offset * UNITS_PER_WORD));
1475         }
1476       offset = 0;
1477     }
1478
1479   /* Now OFFSET is nonzero only for memory operands.  */
1480   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1481   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1482   if (ext_mode != MAX_MACHINE_MODE
1483       && bitsize > 0
1484       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486          acceptable to the format of ext(z)v.  */
1487       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1489            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1490       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1491     {
1492       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1493       rtx bitsize_rtx, bitpos_rtx;
1494       rtx last = get_last_insn ();
1495       rtx xop0 = op0;
1496       rtx xtarget = target;
1497       rtx xspec_target = target;
1498       rtx xspec_target_subreg = 0;
1499       rtx pat;
1500
1501       /* If op0 is a register, we need it in EXT_MODE to make it
1502          acceptable to the format of ext(z)v.  */
1503       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1504         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1505       if (MEM_P (xop0))
1506         /* Get ref to first byte containing part of the field.  */
1507         xop0 = adjust_address (xop0, byte_mode, xoffset);
1508
1509       /* On big-endian machines, we count bits from the most significant.
1510          If the bit field insn does not, we must invert.  */
1511       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1512         xbitpos = unit - bitsize - xbitpos;
1513
1514       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1515       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1516         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1517
1518       unit = GET_MODE_BITSIZE (ext_mode);
1519
1520       if (xtarget == 0)
1521         xtarget = xspec_target = gen_reg_rtx (tmode);
1522
1523       if (GET_MODE (xtarget) != ext_mode)
1524         {
1525           if (REG_P (xtarget))
1526             {
1527               xtarget = gen_lowpart (ext_mode, xtarget);
1528               if (GET_MODE_SIZE (ext_mode)
1529                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1530                 xspec_target_subreg = xtarget;
1531             }
1532           else
1533             xtarget = gen_reg_rtx (ext_mode);
1534         }
1535
1536       /* If this machine's ext(z)v insists on a register target,
1537          make sure we have one.  */
1538       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1539         xtarget = gen_reg_rtx (ext_mode);
1540
1541       bitsize_rtx = GEN_INT (bitsize);
1542       bitpos_rtx = GEN_INT (xbitpos);
1543
1544       pat = (unsignedp
1545              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1546              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1547       if (pat)
1548         {
1549           emit_insn (pat);
1550           if (xtarget == xspec_target)
1551             return xtarget;
1552           if (xtarget == xspec_target_subreg)
1553             return xspec_target;
1554           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1555         }
1556       delete_insns_since (last);
1557     }
1558
1559   /* If OP0 is a memory, try copying it to a register and seeing if a
1560      cheap register alternative is available.  */
1561   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1562     {
1563       enum machine_mode bestmode;
1564
1565       /* Get the mode to use for inserting into this field.  If
1566          OP0 is BLKmode, get the smallest mode consistent with the
1567          alignment. If OP0 is a non-BLKmode object that is no
1568          wider than EXT_MODE, use its mode. Otherwise, use the
1569          smallest mode containing the field.  */
1570
1571       if (GET_MODE (op0) == BLKmode
1572           || (ext_mode != MAX_MACHINE_MODE
1573               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1574         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1575                                   (ext_mode == MAX_MACHINE_MODE
1576                                    ? VOIDmode : ext_mode),
1577                                   MEM_VOLATILE_P (op0));
1578       else
1579         bestmode = GET_MODE (op0);
1580
1581       if (bestmode != VOIDmode
1582           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1583                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1584         {
1585           unsigned HOST_WIDE_INT xoffset, xbitpos;
1586
1587           /* Compute the offset as a multiple of this unit,
1588              counting in bytes.  */
1589           unit = GET_MODE_BITSIZE (bestmode);
1590           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1591           xbitpos = bitnum % unit;
1592
1593           /* Make sure the register is big enough for the whole field.  */
1594           if (xoffset * BITS_PER_UNIT + unit
1595               >= offset * BITS_PER_UNIT + bitsize)
1596             {
1597               rtx last, result, xop0;
1598
1599               last = get_last_insn ();
1600
1601               /* Fetch it to a register in that size.  */
1602               xop0 = adjust_address (op0, bestmode, xoffset);
1603               xop0 = force_reg (bestmode, xop0);
1604               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1605                                             unsignedp, target,
1606                                             mode, tmode, false);
1607               if (result)
1608                 return result;
1609
1610               delete_insns_since (last);
1611             }
1612         }
1613     }
1614
1615   if (!fallback_p)
1616     return NULL;
1617
1618   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1619                                     bitpos, target, unsignedp);
1620   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1621 }
1622
1623 /* Generate code to extract a byte-field from STR_RTX
1624    containing BITSIZE bits, starting at BITNUM,
1625    and put it in TARGET if possible (if TARGET is nonzero).
1626    Regardless of TARGET, we return the rtx for where the value is placed.
1627
1628    STR_RTX is the structure containing the byte (a REG or MEM).
1629    UNSIGNEDP is nonzero if this is an unsigned bit field.
1630    MODE is the natural mode of the field value once extracted.
1631    TMODE is the mode the caller would like the value to have;
1632    but the value may be returned with type MODE instead.
1633
1634    If a TARGET is specified and we can store in it at no extra cost,
1635    we do so, and return TARGET.
1636    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1637    if they are equally easy.  */
1638
1639 rtx
1640 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1641                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1642                    enum machine_mode mode, enum machine_mode tmode)
1643 {
1644   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1645                               target, mode, tmode, true);
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field that is split across two words
1857    and return an RTX for the result.
1858
1859    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1862
1863 static rtx
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1866 {
1867   unsigned int unit;
1868   unsigned int bitsdone = 0;
1869   rtx result = NULL_RTX;
1870   int first = 1;
1871
1872   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1873      much at a time.  */
1874   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875     unit = BITS_PER_WORD;
1876   else
1877     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1878
1879   while (bitsdone < bitsize)
1880     {
1881       unsigned HOST_WIDE_INT thissize;
1882       rtx part, word;
1883       unsigned HOST_WIDE_INT thispos;
1884       unsigned HOST_WIDE_INT offset;
1885
1886       offset = (bitpos + bitsdone) / unit;
1887       thispos = (bitpos + bitsdone) % unit;
1888
1889       /* THISSIZE must not overrun a word boundary.  Otherwise,
1890          extract_fixed_bit_field will call us again, and we will mutually
1891          recurse forever.  */
1892       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893       thissize = MIN (thissize, unit - thispos);
1894
1895       /* If OP0 is a register, then handle OFFSET here.
1896
1897          When handling multiword bitfields, extract_bit_field may pass
1898          down a word_mode SUBREG of a larger REG for a bitfield that actually
1899          crosses a word boundary.  Thus, for a SUBREG, we must find
1900          the current word starting from the base register.  */
1901       if (GET_CODE (op0) == SUBREG)
1902         {
1903           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905                                         GET_MODE (SUBREG_REG (op0)));
1906           offset = 0;
1907         }
1908       else if (REG_P (op0))
1909         {
1910           word = operand_subword_force (op0, offset, GET_MODE (op0));
1911           offset = 0;
1912         }
1913       else
1914         word = op0;
1915
1916       /* Extract the parts in bit-counting order,
1917          whose meaning is determined by BYTES_PER_UNIT.
1918          OFFSET is in UNITs, and UNIT is in bits.
1919          extract_fixed_bit_field wants offset in bytes.  */
1920       part = extract_fixed_bit_field (word_mode, word,
1921                                       offset * unit / BITS_PER_UNIT,
1922                                       thissize, thispos, 0, 1);
1923       bitsdone += thissize;
1924
1925       /* Shift this part into place for the result.  */
1926       if (BYTES_BIG_ENDIAN)
1927         {
1928           if (bitsize != bitsdone)
1929             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1930                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1931                                  0, 1);
1932         }
1933       else
1934         {
1935           if (bitsdone != thissize)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  build_int_cst (NULL_TREE,
1938                                                 bitsdone - thissize), 0, 1);
1939         }
1940
1941       if (first)
1942         result = part;
1943       else
1944         /* Combine the parts with bitwise or.  This works
1945            because we extracted each part as an unsigned bit field.  */
1946         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1947                                OPTAB_LIB_WIDEN);
1948
1949       first = 0;
1950     }
1951
1952   /* Unsigned bit field: we are done.  */
1953   if (unsignedp)
1954     return result;
1955   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1956   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1957                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1958                          NULL_RTX, 0);
1959   return expand_shift (RSHIFT_EXPR, word_mode, result,
1960                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1961                        NULL_RTX, 0);
1962 }
1963 \f
1964 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1965    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1966    MODE, fill the upper bits with zeros.  Fail if the layout of either
1967    mode is unknown (as for CC modes) or if the extraction would involve
1968    unprofitable mode punning.  Return the value on success, otherwise
1969    return null.
1970
1971    This is different from gen_lowpart* in these respects:
1972
1973      - the returned value must always be considered an rvalue
1974
1975      - when MODE is wider than SRC_MODE, the extraction involves
1976        a zero extension
1977
1978      - when MODE is smaller than SRC_MODE, the extraction involves
1979        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1980
1981    In other words, this routine performs a computation, whereas the
1982    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1983    operations.  */
1984
1985 rtx
1986 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1987 {
1988   enum machine_mode int_mode, src_int_mode;
1989
1990   if (mode == src_mode)
1991     return src;
1992
1993   if (CONSTANT_P (src))
1994     {
1995       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1996          fails, it will happily create (subreg (symbol_ref)) or similar
1997          invalid SUBREGs.  */
1998       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1999       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2000       if (ret)
2001         return ret;
2002
2003       if (GET_MODE (src) == VOIDmode
2004           || !validate_subreg (mode, src_mode, src, byte))
2005         return NULL_RTX;
2006
2007       src = force_reg (GET_MODE (src), src);
2008       return gen_rtx_SUBREG (mode, src, byte);
2009     }
2010
2011   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2012     return NULL_RTX;
2013
2014   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2015       && MODES_TIEABLE_P (mode, src_mode))
2016     {
2017       rtx x = gen_lowpart_common (mode, src);
2018       if (x)
2019         return x;
2020     }
2021
2022   src_int_mode = int_mode_for_mode (src_mode);
2023   int_mode = int_mode_for_mode (mode);
2024   if (src_int_mode == BLKmode || int_mode == BLKmode)
2025     return NULL_RTX;
2026
2027   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2028     return NULL_RTX;
2029   if (!MODES_TIEABLE_P (int_mode, mode))
2030     return NULL_RTX;
2031
2032   src = gen_lowpart (src_int_mode, src);
2033   src = convert_modes (int_mode, src_int_mode, src, true);
2034   src = gen_lowpart (mode, src);
2035   return src;
2036 }
2037 \f
2038 /* Add INC into TARGET.  */
2039
2040 void
2041 expand_inc (rtx target, rtx inc)
2042 {
2043   rtx value = expand_binop (GET_MODE (target), add_optab,
2044                             target, inc,
2045                             target, 0, OPTAB_LIB_WIDEN);
2046   if (value != target)
2047     emit_move_insn (target, value);
2048 }
2049
2050 /* Subtract DEC from TARGET.  */
2051
2052 void
2053 expand_dec (rtx target, rtx dec)
2054 {
2055   rtx value = expand_binop (GET_MODE (target), sub_optab,
2056                             target, dec,
2057                             target, 0, OPTAB_LIB_WIDEN);
2058   if (value != target)
2059     emit_move_insn (target, value);
2060 }
2061 \f
2062 /* Output a shift instruction for expression code CODE,
2063    with SHIFTED being the rtx for the value to shift,
2064    and AMOUNT the tree for the amount to shift by.
2065    Store the result in the rtx TARGET, if that is convenient.
2066    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2067    Return the rtx for where the value is.  */
2068
2069 rtx
2070 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2071               tree amount, rtx target, int unsignedp)
2072 {
2073   rtx op1, temp = 0;
2074   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2075   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2076   optab lshift_optab = ashl_optab;
2077   optab rshift_arith_optab = ashr_optab;
2078   optab rshift_uns_optab = lshr_optab;
2079   optab lrotate_optab = rotl_optab;
2080   optab rrotate_optab = rotr_optab;
2081   enum machine_mode op1_mode;
2082   int attempt;
2083   bool speed = optimize_insn_for_speed_p ();
2084
2085   op1 = expand_normal (amount);
2086   op1_mode = GET_MODE (op1);
2087
2088   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2089      shift amount is a vector, use the vector/vector shift patterns.  */
2090   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2091     {
2092       lshift_optab = vashl_optab;
2093       rshift_arith_optab = vashr_optab;
2094       rshift_uns_optab = vlshr_optab;
2095       lrotate_optab = vrotl_optab;
2096       rrotate_optab = vrotr_optab;
2097     }
2098
2099   /* Previously detected shift-counts computed by NEGATE_EXPR
2100      and shifted in the other direction; but that does not work
2101      on all machines.  */
2102
2103   if (SHIFT_COUNT_TRUNCATED)
2104     {
2105       if (GET_CODE (op1) == CONST_INT
2106           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2107               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2108         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2109                        % GET_MODE_BITSIZE (mode));
2110       else if (GET_CODE (op1) == SUBREG
2111                && subreg_lowpart_p (op1))
2112         op1 = SUBREG_REG (op1);
2113     }
2114
2115   if (op1 == const0_rtx)
2116     return shifted;
2117
2118   /* Check whether its cheaper to implement a left shift by a constant
2119      bit count by a sequence of additions.  */
2120   if (code == LSHIFT_EXPR
2121       && GET_CODE (op1) == CONST_INT
2122       && INTVAL (op1) > 0
2123       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2124       && INTVAL (op1) < MAX_BITS_PER_WORD
2125       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2126       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2127     {
2128       int i;
2129       for (i = 0; i < INTVAL (op1); i++)
2130         {
2131           temp = force_reg (mode, shifted);
2132           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2133                                   unsignedp, OPTAB_LIB_WIDEN);
2134         }
2135       return shifted;
2136     }
2137
2138   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2139     {
2140       enum optab_methods methods;
2141
2142       if (attempt == 0)
2143         methods = OPTAB_DIRECT;
2144       else if (attempt == 1)
2145         methods = OPTAB_WIDEN;
2146       else
2147         methods = OPTAB_LIB_WIDEN;
2148
2149       if (rotate)
2150         {
2151           /* Widening does not work for rotation.  */
2152           if (methods == OPTAB_WIDEN)
2153             continue;
2154           else if (methods == OPTAB_LIB_WIDEN)
2155             {
2156               /* If we have been unable to open-code this by a rotation,
2157                  do it as the IOR of two shifts.  I.e., to rotate A
2158                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2159                  where C is the bitsize of A.
2160
2161                  It is theoretically possible that the target machine might
2162                  not be able to perform either shift and hence we would
2163                  be making two libcalls rather than just the one for the
2164                  shift (similarly if IOR could not be done).  We will allow
2165                  this extremely unlikely lossage to avoid complicating the
2166                  code below.  */
2167
2168               rtx subtarget = target == shifted ? 0 : target;
2169               tree new_amount, other_amount;
2170               rtx temp1;
2171               tree type = TREE_TYPE (amount);
2172               if (GET_MODE (op1) != TYPE_MODE (type)
2173                   && GET_MODE (op1) != VOIDmode)
2174                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2175               new_amount = make_tree (type, op1);
2176               other_amount
2177                 = fold_build2 (MINUS_EXPR, type,
2178                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2179                                new_amount);
2180
2181               shifted = force_reg (mode, shifted);
2182
2183               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2184                                    mode, shifted, new_amount, 0, 1);
2185               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2186                                     mode, shifted, other_amount, subtarget, 1);
2187               return expand_binop (mode, ior_optab, temp, temp1, target,
2188                                    unsignedp, methods);
2189             }
2190
2191           temp = expand_binop (mode,
2192                                left ? lrotate_optab : rrotate_optab,
2193                                shifted, op1, target, unsignedp, methods);
2194         }
2195       else if (unsignedp)
2196         temp = expand_binop (mode,
2197                              left ? lshift_optab : rshift_uns_optab,
2198                              shifted, op1, target, unsignedp, methods);
2199
2200       /* Do arithmetic shifts.
2201          Also, if we are going to widen the operand, we can just as well
2202          use an arithmetic right-shift instead of a logical one.  */
2203       if (temp == 0 && ! rotate
2204           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2205         {
2206           enum optab_methods methods1 = methods;
2207
2208           /* If trying to widen a log shift to an arithmetic shift,
2209              don't accept an arithmetic shift of the same size.  */
2210           if (unsignedp)
2211             methods1 = OPTAB_MUST_WIDEN;
2212
2213           /* Arithmetic shift */
2214
2215           temp = expand_binop (mode,
2216                                left ? lshift_optab : rshift_arith_optab,
2217                                shifted, op1, target, unsignedp, methods1);
2218         }
2219
2220       /* We used to try extzv here for logical right shifts, but that was
2221          only useful for one machine, the VAX, and caused poor code
2222          generation there for lshrdi3, so the code was deleted and a
2223          define_expand for lshrsi3 was added to vax.md.  */
2224     }
2225
2226   gcc_assert (temp);
2227   return temp;
2228 }
2229 \f
2230 enum alg_code {
2231   alg_unknown,
2232   alg_zero,
2233   alg_m, alg_shift,
2234   alg_add_t_m2,
2235   alg_sub_t_m2,
2236   alg_add_factor,
2237   alg_sub_factor,
2238   alg_add_t2_m,
2239   alg_sub_t2_m,
2240   alg_impossible
2241 };
2242
2243 /* This structure holds the "cost" of a multiply sequence.  The
2244    "cost" field holds the total rtx_cost of every operator in the
2245    synthetic multiplication sequence, hence cost(a op b) is defined
2246    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2247    The "latency" field holds the minimum possible latency of the
2248    synthetic multiply, on a hypothetical infinitely parallel CPU.
2249    This is the critical path, or the maximum height, of the expression
2250    tree which is the sum of rtx_costs on the most expensive path from
2251    any leaf to the root.  Hence latency(a op b) is defined as zero for
2252    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2253
2254 struct mult_cost {
2255   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2256   short latency;  /* The latency of the multiplication sequence.  */
2257 };
2258
2259 /* This macro is used to compare a pointer to a mult_cost against an
2260    single integer "rtx_cost" value.  This is equivalent to the macro
2261    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2262 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2263                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2264
2265 /* This macro is used to compare two pointers to mult_costs against
2266    each other.  The macro returns true if X is cheaper than Y.
2267    Currently, the cheaper of two mult_costs is the one with the
2268    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2269 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2270                                  || ((X)->cost == (Y)->cost     \
2271                                      && (X)->latency < (Y)->latency))
2272
2273 /* This structure records a sequence of operations.
2274    `ops' is the number of operations recorded.
2275    `cost' is their total cost.
2276    The operations are stored in `op' and the corresponding
2277    logarithms of the integer coefficients in `log'.
2278
2279    These are the operations:
2280    alg_zero             total := 0;
2281    alg_m                total := multiplicand;
2282    alg_shift            total := total * coeff
2283    alg_add_t_m2         total := total + multiplicand * coeff;
2284    alg_sub_t_m2         total := total - multiplicand * coeff;
2285    alg_add_factor       total := total * coeff + total;
2286    alg_sub_factor       total := total * coeff - total;
2287    alg_add_t2_m         total := total * coeff + multiplicand;
2288    alg_sub_t2_m         total := total * coeff - multiplicand;
2289
2290    The first operand must be either alg_zero or alg_m.  */
2291
2292 struct algorithm
2293 {
2294   struct mult_cost cost;
2295   short ops;
2296   /* The size of the OP and LOG fields are not directly related to the
2297      word size, but the worst-case algorithms will be if we have few
2298      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2299      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2300      in total wordsize operations.  */
2301   enum alg_code op[MAX_BITS_PER_WORD];
2302   char log[MAX_BITS_PER_WORD];
2303 };
2304
2305 /* The entry for our multiplication cache/hash table.  */
2306 struct alg_hash_entry {
2307   /* The number we are multiplying by.  */
2308   unsigned HOST_WIDE_INT t;
2309
2310   /* The mode in which we are multiplying something by T.  */
2311   enum machine_mode mode;
2312
2313   /* The best multiplication algorithm for t.  */
2314   enum alg_code alg;
2315
2316   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2317      Otherwise, the cost within which multiplication by T is
2318      impossible.  */
2319   struct mult_cost cost;
2320
2321   /* OPtimized for speed? */
2322   bool speed;
2323 };
2324
2325 /* The number of cache/hash entries.  */
2326 #if HOST_BITS_PER_WIDE_INT == 64
2327 #define NUM_ALG_HASH_ENTRIES 1031
2328 #else
2329 #define NUM_ALG_HASH_ENTRIES 307
2330 #endif
2331
2332 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2333    actually a hash table.  If we have a collision, that the older
2334    entry is kicked out.  */
2335 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2336
2337 /* Indicates the type of fixup needed after a constant multiplication.
2338    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2339    the result should be negated, and ADD_VARIANT means that the
2340    multiplicand should be added to the result.  */
2341 enum mult_variant {basic_variant, negate_variant, add_variant};
2342
2343 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2344                         const struct mult_cost *, enum machine_mode mode);
2345 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2346                                  struct algorithm *, enum mult_variant *, int);
2347 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2348                               const struct algorithm *, enum mult_variant);
2349 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2350                                                  int, rtx *, int *, int *);
2351 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2352 static rtx extract_high_half (enum machine_mode, rtx);
2353 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2354 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2355                                        int, int);
2356 /* Compute and return the best algorithm for multiplying by T.
2357    The algorithm must cost less than cost_limit
2358    If retval.cost >= COST_LIMIT, no algorithm was found and all
2359    other field of the returned struct are undefined.
2360    MODE is the machine mode of the multiplication.  */
2361
2362 static void
2363 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2364             const struct mult_cost *cost_limit, enum machine_mode mode)
2365 {
2366   int m;
2367   struct algorithm *alg_in, *best_alg;
2368   struct mult_cost best_cost;
2369   struct mult_cost new_limit;
2370   int op_cost, op_latency;
2371   unsigned HOST_WIDE_INT q;
2372   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2373   int hash_index;
2374   bool cache_hit = false;
2375   enum alg_code cache_alg = alg_zero;
2376   bool speed = optimize_insn_for_speed_p ();
2377
2378   /* Indicate that no algorithm is yet found.  If no algorithm
2379      is found, this value will be returned and indicate failure.  */
2380   alg_out->cost.cost = cost_limit->cost + 1;
2381   alg_out->cost.latency = cost_limit->latency + 1;
2382
2383   if (cost_limit->cost < 0
2384       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2385     return;
2386
2387   /* Restrict the bits of "t" to the multiplication's mode.  */
2388   t &= GET_MODE_MASK (mode);
2389
2390   /* t == 1 can be done in zero cost.  */
2391   if (t == 1)
2392     {
2393       alg_out->ops = 1;
2394       alg_out->cost.cost = 0;
2395       alg_out->cost.latency = 0;
2396       alg_out->op[0] = alg_m;
2397       return;
2398     }
2399
2400   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2401      fail now.  */
2402   if (t == 0)
2403     {
2404       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2405         return;
2406       else
2407         {
2408           alg_out->ops = 1;
2409           alg_out->cost.cost = zero_cost[speed];
2410           alg_out->cost.latency = zero_cost[speed];
2411           alg_out->op[0] = alg_zero;
2412           return;
2413         }
2414     }
2415
2416   /* We'll be needing a couple extra algorithm structures now.  */
2417
2418   alg_in = XALLOCA (struct algorithm);
2419   best_alg = XALLOCA (struct algorithm);
2420   best_cost = *cost_limit;
2421
2422   /* Compute the hash index.  */
2423   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2424
2425   /* See if we already know what to do for T.  */
2426   if (alg_hash[hash_index].t == t
2427       && alg_hash[hash_index].mode == mode
2428       && alg_hash[hash_index].mode == mode
2429       && alg_hash[hash_index].speed == speed
2430       && alg_hash[hash_index].alg != alg_unknown)
2431     {
2432       cache_alg = alg_hash[hash_index].alg;
2433
2434       if (cache_alg == alg_impossible)
2435         {
2436           /* The cache tells us that it's impossible to synthesize
2437              multiplication by T within alg_hash[hash_index].cost.  */
2438           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2439             /* COST_LIMIT is at least as restrictive as the one
2440                recorded in the hash table, in which case we have no
2441                hope of synthesizing a multiplication.  Just
2442                return.  */
2443             return;
2444
2445           /* If we get here, COST_LIMIT is less restrictive than the
2446              one recorded in the hash table, so we may be able to
2447              synthesize a multiplication.  Proceed as if we didn't
2448              have the cache entry.  */
2449         }
2450       else
2451         {
2452           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2453             /* The cached algorithm shows that this multiplication
2454                requires more cost than COST_LIMIT.  Just return.  This
2455                way, we don't clobber this cache entry with
2456                alg_impossible but retain useful information.  */
2457             return;
2458
2459           cache_hit = true;
2460
2461           switch (cache_alg)
2462             {
2463             case alg_shift:
2464               goto do_alg_shift;
2465
2466             case alg_add_t_m2:
2467             case alg_sub_t_m2:
2468               goto do_alg_addsub_t_m2;
2469
2470             case alg_add_factor:
2471             case alg_sub_factor:
2472               goto do_alg_addsub_factor;
2473
2474             case alg_add_t2_m:
2475               goto do_alg_add_t2_m;
2476
2477             case alg_sub_t2_m:
2478               goto do_alg_sub_t2_m;
2479
2480             default:
2481               gcc_unreachable ();
2482             }
2483         }
2484     }
2485
2486   /* If we have a group of zero bits at the low-order part of T, try
2487      multiplying by the remaining bits and then doing a shift.  */
2488
2489   if ((t & 1) == 0)
2490     {
2491     do_alg_shift:
2492       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2493       if (m < maxm)
2494         {
2495           q = t >> m;
2496           /* The function expand_shift will choose between a shift and
2497              a sequence of additions, so the observed cost is given as
2498              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2499           op_cost = m * add_cost[speed][mode];
2500           if (shift_cost[speed][mode][m] < op_cost)
2501             op_cost = shift_cost[speed][mode][m];
2502           new_limit.cost = best_cost.cost - op_cost;
2503           new_limit.latency = best_cost.latency - op_cost;
2504           synth_mult (alg_in, q, &new_limit, mode);
2505
2506           alg_in->cost.cost += op_cost;
2507           alg_in->cost.latency += op_cost;
2508           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2509             {
2510               struct algorithm *x;
2511               best_cost = alg_in->cost;
2512               x = alg_in, alg_in = best_alg, best_alg = x;
2513               best_alg->log[best_alg->ops] = m;
2514               best_alg->op[best_alg->ops] = alg_shift;
2515             }
2516         }
2517       if (cache_hit)
2518         goto done;
2519     }
2520
2521   /* If we have an odd number, add or subtract one.  */
2522   if ((t & 1) != 0)
2523     {
2524       unsigned HOST_WIDE_INT w;
2525
2526     do_alg_addsub_t_m2:
2527       for (w = 1; (w & t) != 0; w <<= 1)
2528         ;
2529       /* If T was -1, then W will be zero after the loop.  This is another
2530          case where T ends with ...111.  Handling this with (T + 1) and
2531          subtract 1 produces slightly better code and results in algorithm
2532          selection much faster than treating it like the ...0111 case
2533          below.  */
2534       if (w == 0
2535           || (w > 2
2536               /* Reject the case where t is 3.
2537                  Thus we prefer addition in that case.  */
2538               && t != 3))
2539         {
2540           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2541
2542           op_cost = add_cost[speed][mode];
2543           new_limit.cost = best_cost.cost - op_cost;
2544           new_limit.latency = best_cost.latency - op_cost;
2545           synth_mult (alg_in, t + 1, &new_limit, mode);
2546
2547           alg_in->cost.cost += op_cost;
2548           alg_in->cost.latency += op_cost;
2549           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2550             {
2551               struct algorithm *x;
2552               best_cost = alg_in->cost;
2553               x = alg_in, alg_in = best_alg, best_alg = x;
2554               best_alg->log[best_alg->ops] = 0;
2555               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2556             }
2557         }
2558       else
2559         {
2560           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2561
2562           op_cost = add_cost[speed][mode];
2563           new_limit.cost = best_cost.cost - op_cost;
2564           new_limit.latency = best_cost.latency - op_cost;
2565           synth_mult (alg_in, t - 1, &new_limit, mode);
2566
2567           alg_in->cost.cost += op_cost;
2568           alg_in->cost.latency += op_cost;
2569           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2570             {
2571               struct algorithm *x;
2572               best_cost = alg_in->cost;
2573               x = alg_in, alg_in = best_alg, best_alg = x;
2574               best_alg->log[best_alg->ops] = 0;
2575               best_alg->op[best_alg->ops] = alg_add_t_m2;
2576             }
2577         }
2578       if (cache_hit)
2579         goto done;
2580     }
2581
2582   /* Look for factors of t of the form
2583      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2584      If we find such a factor, we can multiply by t using an algorithm that
2585      multiplies by q, shift the result by m and add/subtract it to itself.
2586
2587      We search for large factors first and loop down, even if large factors
2588      are less probable than small; if we find a large factor we will find a
2589      good sequence quickly, and therefore be able to prune (by decreasing
2590      COST_LIMIT) the search.  */
2591
2592  do_alg_addsub_factor:
2593   for (m = floor_log2 (t - 1); m >= 2; m--)
2594     {
2595       unsigned HOST_WIDE_INT d;
2596
2597       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2598       if (t % d == 0 && t > d && m < maxm
2599           && (!cache_hit || cache_alg == alg_add_factor))
2600         {
2601           /* If the target has a cheap shift-and-add instruction use
2602              that in preference to a shift insn followed by an add insn.
2603              Assume that the shift-and-add is "atomic" with a latency
2604              equal to its cost, otherwise assume that on superscalar
2605              hardware the shift may be executed concurrently with the
2606              earlier steps in the algorithm.  */
2607           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2608           if (shiftadd_cost[speed][mode][m] < op_cost)
2609             {
2610               op_cost = shiftadd_cost[speed][mode][m];
2611               op_latency = op_cost;
2612             }
2613           else
2614             op_latency = add_cost[speed][mode];
2615
2616           new_limit.cost = best_cost.cost - op_cost;
2617           new_limit.latency = best_cost.latency - op_latency;
2618           synth_mult (alg_in, t / d, &new_limit, mode);
2619
2620           alg_in->cost.cost += op_cost;
2621           alg_in->cost.latency += op_latency;
2622           if (alg_in->cost.latency < op_cost)
2623             alg_in->cost.latency = op_cost;
2624           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2625             {
2626               struct algorithm *x;
2627               best_cost = alg_in->cost;
2628               x = alg_in, alg_in = best_alg, best_alg = x;
2629               best_alg->log[best_alg->ops] = m;
2630               best_alg->op[best_alg->ops] = alg_add_factor;
2631             }
2632           /* Other factors will have been taken care of in the recursion.  */
2633           break;
2634         }
2635
2636       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2637       if (t % d == 0 && t > d && m < maxm
2638           && (!cache_hit || cache_alg == alg_sub_factor))
2639         {
2640           /* If the target has a cheap shift-and-subtract insn use
2641              that in preference to a shift insn followed by a sub insn.
2642              Assume that the shift-and-sub is "atomic" with a latency
2643              equal to it's cost, otherwise assume that on superscalar
2644              hardware the shift may be executed concurrently with the
2645              earlier steps in the algorithm.  */
2646           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2647           if (shiftsub_cost[speed][mode][m] < op_cost)
2648             {
2649               op_cost = shiftsub_cost[speed][mode][m];
2650               op_latency = op_cost;
2651             }
2652           else
2653             op_latency = add_cost[speed][mode];
2654
2655           new_limit.cost = best_cost.cost - op_cost;
2656           new_limit.latency = best_cost.latency - op_latency;
2657           synth_mult (alg_in, t / d, &new_limit, mode);
2658
2659           alg_in->cost.cost += op_cost;
2660           alg_in->cost.latency += op_latency;
2661           if (alg_in->cost.latency < op_cost)
2662             alg_in->cost.latency = op_cost;
2663           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664             {
2665               struct algorithm *x;
2666               best_cost = alg_in->cost;
2667               x = alg_in, alg_in = best_alg, best_alg = x;
2668               best_alg->log[best_alg->ops] = m;
2669               best_alg->op[best_alg->ops] = alg_sub_factor;
2670             }
2671           break;
2672         }
2673     }
2674   if (cache_hit)
2675     goto done;
2676
2677   /* Try shift-and-add (load effective address) instructions,
2678      i.e. do a*3, a*5, a*9.  */
2679   if ((t & 1) != 0)
2680     {
2681     do_alg_add_t2_m:
2682       q = t - 1;
2683       q = q & -q;
2684       m = exact_log2 (q);
2685       if (m >= 0 && m < maxm)
2686         {
2687           op_cost = shiftadd_cost[speed][mode][m];
2688           new_limit.cost = best_cost.cost - op_cost;
2689           new_limit.latency = best_cost.latency - op_cost;
2690           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2691
2692           alg_in->cost.cost += op_cost;
2693           alg_in->cost.latency += op_cost;
2694           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2695             {
2696               struct algorithm *x;
2697               best_cost = alg_in->cost;
2698               x = alg_in, alg_in = best_alg, best_alg = x;
2699               best_alg->log[best_alg->ops] = m;
2700               best_alg->op[best_alg->ops] = alg_add_t2_m;
2701             }
2702         }
2703       if (cache_hit)
2704         goto done;
2705
2706     do_alg_sub_t2_m:
2707       q = t + 1;
2708       q = q & -q;
2709       m = exact_log2 (q);
2710       if (m >= 0 && m < maxm)
2711         {
2712           op_cost = shiftsub_cost[speed][mode][m];
2713           new_limit.cost = best_cost.cost - op_cost;
2714           new_limit.latency = best_cost.latency - op_cost;
2715           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2716
2717           alg_in->cost.cost += op_cost;
2718           alg_in->cost.latency += op_cost;
2719           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720             {
2721               struct algorithm *x;
2722               best_cost = alg_in->cost;
2723               x = alg_in, alg_in = best_alg, best_alg = x;
2724               best_alg->log[best_alg->ops] = m;
2725               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2726             }
2727         }
2728       if (cache_hit)
2729         goto done;
2730     }
2731
2732  done:
2733   /* If best_cost has not decreased, we have not found any algorithm.  */
2734   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2735     {
2736       /* We failed to find an algorithm.  Record alg_impossible for
2737          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2738          we are asked to find an algorithm for T within the same or
2739          lower COST_LIMIT, we can immediately return to the
2740          caller.  */
2741       alg_hash[hash_index].t = t;
2742       alg_hash[hash_index].mode = mode;
2743       alg_hash[hash_index].speed = speed;
2744       alg_hash[hash_index].alg = alg_impossible;
2745       alg_hash[hash_index].cost = *cost_limit;
2746       return;
2747     }
2748
2749   /* Cache the result.  */
2750   if (!cache_hit)
2751     {
2752       alg_hash[hash_index].t = t;
2753       alg_hash[hash_index].mode = mode;
2754       alg_hash[hash_index].speed = speed;
2755       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2756       alg_hash[hash_index].cost.cost = best_cost.cost;
2757       alg_hash[hash_index].cost.latency = best_cost.latency;
2758     }
2759
2760   /* If we are getting a too long sequence for `struct algorithm'
2761      to record, make this search fail.  */
2762   if (best_alg->ops == MAX_BITS_PER_WORD)
2763     return;
2764
2765   /* Copy the algorithm from temporary space to the space at alg_out.
2766      We avoid using structure assignment because the majority of
2767      best_alg is normally undefined, and this is a critical function.  */
2768   alg_out->ops = best_alg->ops + 1;
2769   alg_out->cost = best_cost;
2770   memcpy (alg_out->op, best_alg->op,
2771           alg_out->ops * sizeof *alg_out->op);
2772   memcpy (alg_out->log, best_alg->log,
2773           alg_out->ops * sizeof *alg_out->log);
2774 }
2775 \f
2776 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2777    Try three variations:
2778
2779        - a shift/add sequence based on VAL itself
2780        - a shift/add sequence based on -VAL, followed by a negation
2781        - a shift/add sequence based on VAL - 1, followed by an addition.
2782
2783    Return true if the cheapest of these cost less than MULT_COST,
2784    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2785
2786 static bool
2787 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2788                      struct algorithm *alg, enum mult_variant *variant,
2789                      int mult_cost)
2790 {
2791   struct algorithm alg2;
2792   struct mult_cost limit;
2793   int op_cost;
2794   bool speed = optimize_insn_for_speed_p ();
2795
2796   /* Fail quickly for impossible bounds.  */
2797   if (mult_cost < 0)
2798     return false;
2799
2800   /* Ensure that mult_cost provides a reasonable upper bound.
2801      Any constant multiplication can be performed with less
2802      than 2 * bits additions.  */
2803   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2804   if (mult_cost > op_cost)
2805     mult_cost = op_cost;
2806
2807   *variant = basic_variant;
2808   limit.cost = mult_cost;
2809   limit.latency = mult_cost;
2810   synth_mult (alg, val, &limit, mode);
2811
2812   /* This works only if the inverted value actually fits in an
2813      `unsigned int' */
2814   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2815     {
2816       op_cost = neg_cost[speed][mode];
2817       if (MULT_COST_LESS (&alg->cost, mult_cost))
2818         {
2819           limit.cost = alg->cost.cost - op_cost;
2820           limit.latency = alg->cost.latency - op_cost;
2821         }
2822       else
2823         {
2824           limit.cost = mult_cost - op_cost;
2825           limit.latency = mult_cost - op_cost;
2826         }
2827
2828       synth_mult (&alg2, -val, &limit, mode);
2829       alg2.cost.cost += op_cost;
2830       alg2.cost.latency += op_cost;
2831       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2832         *alg = alg2, *variant = negate_variant;
2833     }
2834
2835   /* This proves very useful for division-by-constant.  */
2836   op_cost = add_cost[speed][mode];
2837   if (MULT_COST_LESS (&alg->cost, mult_cost))
2838     {
2839       limit.cost = alg->cost.cost - op_cost;
2840       limit.latency = alg->cost.latency - op_cost;
2841     }
2842   else
2843     {
2844       limit.cost = mult_cost - op_cost;
2845       limit.latency = mult_cost - op_cost;
2846     }
2847
2848   synth_mult (&alg2, val - 1, &limit, mode);
2849   alg2.cost.cost += op_cost;
2850   alg2.cost.latency += op_cost;
2851   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2852     *alg = alg2, *variant = add_variant;
2853
2854   return MULT_COST_LESS (&alg->cost, mult_cost);
2855 }
2856
2857 /* A subroutine of expand_mult, used for constant multiplications.
2858    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2859    convenient.  Use the shift/add sequence described by ALG and apply
2860    the final fixup specified by VARIANT.  */
2861
2862 static rtx
2863 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2864                    rtx target, const struct algorithm *alg,
2865                    enum mult_variant variant)
2866 {
2867   HOST_WIDE_INT val_so_far;
2868   rtx insn, accum, tem;
2869   int opno;
2870   enum machine_mode nmode;
2871
2872   /* Avoid referencing memory over and over and invalid sharing
2873      on SUBREGs.  */
2874   op0 = force_reg (mode, op0);
2875
2876   /* ACCUM starts out either as OP0 or as a zero, depending on
2877      the first operation.  */
2878
2879   if (alg->op[0] == alg_zero)
2880     {
2881       accum = copy_to_mode_reg (mode, const0_rtx);
2882       val_so_far = 0;
2883     }
2884   else if (alg->op[0] == alg_m)
2885     {
2886       accum = copy_to_mode_reg (mode, op0);
2887       val_so_far = 1;
2888     }
2889   else
2890     gcc_unreachable ();
2891
2892   for (opno = 1; opno < alg->ops; opno++)
2893     {
2894       int log = alg->log[opno];
2895       rtx shift_subtarget = optimize ? 0 : accum;
2896       rtx add_target
2897         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2898            && !optimize)
2899           ? target : 0;
2900       rtx accum_target = optimize ? 0 : accum;
2901
2902       switch (alg->op[opno])
2903         {
2904         case alg_shift:
2905           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2906                                 build_int_cst (NULL_TREE, log),
2907                                 NULL_RTX, 0);
2908           val_so_far <<= log;
2909           break;
2910
2911         case alg_add_t_m2:
2912           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2913                               build_int_cst (NULL_TREE, log),
2914                               NULL_RTX, 0);
2915           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2916                                  add_target ? add_target : accum_target);
2917           val_so_far += (HOST_WIDE_INT) 1 << log;
2918           break;
2919
2920         case alg_sub_t_m2:
2921           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2922                               build_int_cst (NULL_TREE, log),
2923                               NULL_RTX, 0);
2924           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2925                                  add_target ? add_target : accum_target);
2926           val_so_far -= (HOST_WIDE_INT) 1 << log;
2927           break;
2928
2929         case alg_add_t2_m:
2930           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2931                                 build_int_cst (NULL_TREE, log),
2932                                 shift_subtarget,
2933                                 0);
2934           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2935                                  add_target ? add_target : accum_target);
2936           val_so_far = (val_so_far << log) + 1;
2937           break;
2938
2939         case alg_sub_t2_m:
2940           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2941                                 build_int_cst (NULL_TREE, log),
2942                                 shift_subtarget, 0);
2943           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2944                                  add_target ? add_target : accum_target);
2945           val_so_far = (val_so_far << log) - 1;
2946           break;
2947
2948         case alg_add_factor:
2949           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2950                               build_int_cst (NULL_TREE, log),
2951                               NULL_RTX, 0);
2952           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2953                                  add_target ? add_target : accum_target);
2954           val_so_far += val_so_far << log;
2955           break;
2956
2957         case alg_sub_factor:
2958           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2959                               build_int_cst (NULL_TREE, log),
2960                               NULL_RTX, 0);
2961           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2962                                  (add_target
2963                                   ? add_target : (optimize ? 0 : tem)));
2964           val_so_far = (val_so_far << log) - val_so_far;
2965           break;
2966
2967         default:
2968           gcc_unreachable ();
2969         }
2970
2971       /* Write a REG_EQUAL note on the last insn so that we can cse
2972          multiplication sequences.  Note that if ACCUM is a SUBREG,
2973          we've set the inner register and must properly indicate
2974          that.  */
2975
2976       tem = op0, nmode = mode;
2977       if (GET_CODE (accum) == SUBREG)
2978         {
2979           nmode = GET_MODE (SUBREG_REG (accum));
2980           tem = gen_lowpart (nmode, op0);
2981         }
2982
2983       insn = get_last_insn ();
2984       set_unique_reg_note (insn, REG_EQUAL,
2985                            gen_rtx_MULT (nmode, tem,
2986                                          GEN_INT (val_so_far)));
2987     }
2988
2989   if (variant == negate_variant)
2990     {
2991       val_so_far = -val_so_far;
2992       accum = expand_unop (mode, neg_optab, accum, target, 0);
2993     }
2994   else if (variant == add_variant)
2995     {
2996       val_so_far = val_so_far + 1;
2997       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2998     }
2999
3000   /* Compare only the bits of val and val_so_far that are significant
3001      in the result mode, to avoid sign-/zero-extension confusion.  */
3002   val &= GET_MODE_MASK (mode);
3003   val_so_far &= GET_MODE_MASK (mode);
3004   gcc_assert (val == val_so_far);
3005
3006   return accum;
3007 }
3008
3009 /* Perform a multiplication and return an rtx for the result.
3010    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3011    TARGET is a suggestion for where to store the result (an rtx).
3012
3013    We check specially for a constant integer as OP1.
3014    If you want this check for OP0 as well, then before calling
3015    you should swap the two operands if OP0 would be constant.  */
3016
3017 rtx
3018 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3019              int unsignedp)
3020 {
3021   enum mult_variant variant;
3022   struct algorithm algorithm;
3023   int max_cost;
3024   bool speed = optimize_insn_for_speed_p ();
3025
3026   /* Handling const0_rtx here allows us to use zero as a rogue value for
3027      coeff below.  */
3028   if (op1 == const0_rtx)
3029     return const0_rtx;
3030   if (op1 == const1_rtx)
3031     return op0;
3032   if (op1 == constm1_rtx)
3033     return expand_unop (mode,
3034                         GET_MODE_CLASS (mode) == MODE_INT
3035                         && !unsignedp && flag_trapv
3036                         ? negv_optab : neg_optab,
3037                         op0, target, 0);
3038
3039   /* These are the operations that are potentially turned into a sequence
3040      of shifts and additions.  */
3041   if (SCALAR_INT_MODE_P (mode)
3042       && (unsignedp || !flag_trapv))
3043     {
3044       HOST_WIDE_INT coeff = 0;
3045       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3046
3047       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3048          less than or equal in size to `unsigned int' this doesn't matter.
3049          If the mode is larger than `unsigned int', then synth_mult works
3050          only if the constant value exactly fits in an `unsigned int' without
3051          any truncation.  This means that multiplying by negative values does
3052          not work; results are off by 2^32 on a 32 bit machine.  */
3053
3054       if (GET_CODE (op1) == CONST_INT)
3055         {
3056           /* Attempt to handle multiplication of DImode values by negative
3057              coefficients, by performing the multiplication by a positive
3058              multiplier and then inverting the result.  */
3059           if (INTVAL (op1) < 0
3060               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3061             {
3062               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3063                  result is interpreted as an unsigned coefficient.
3064                  Exclude cost of op0 from max_cost to match the cost
3065                  calculation of the synth_mult.  */
3066               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3067                          - neg_cost[speed][mode];
3068               if (max_cost > 0
3069                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3070                                           &variant, max_cost))
3071                 {
3072                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3073                                                 NULL_RTX, &algorithm,
3074                                                 variant);
3075                   return expand_unop (mode, neg_optab, temp, target, 0);
3076                 }
3077             }
3078           else coeff = INTVAL (op1);
3079         }
3080       else if (GET_CODE (op1) == CONST_DOUBLE)
3081         {
3082           /* If we are multiplying in DImode, it may still be a win
3083              to try to work with shifts and adds.  */
3084           if (CONST_DOUBLE_HIGH (op1) == 0)
3085             coeff = CONST_DOUBLE_LOW (op1);
3086           else if (CONST_DOUBLE_LOW (op1) == 0
3087                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3088             {
3089               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3090                           + HOST_BITS_PER_WIDE_INT;
3091               return expand_shift (LSHIFT_EXPR, mode, op0,
3092                                    build_int_cst (NULL_TREE, shift),
3093                                    target, unsignedp);
3094             }
3095         }
3096
3097       /* We used to test optimize here, on the grounds that it's better to
3098          produce a smaller program when -O is not used.  But this causes
3099          such a terrible slowdown sometimes that it seems better to always
3100          use synth_mult.  */
3101       if (coeff != 0)
3102         {
3103           /* Special case powers of two.  */
3104           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3105             return expand_shift (LSHIFT_EXPR, mode, op0,
3106                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3107                                  target, unsignedp);
3108
3109           /* Exclude cost of op0 from max_cost to match the cost
3110              calculation of the synth_mult.  */
3111           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3112           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3113                                    max_cost))
3114             return expand_mult_const (mode, op0, coeff, target,
3115                                       &algorithm, variant);
3116         }
3117     }
3118
3119   if (GET_CODE (op0) == CONST_DOUBLE)
3120     {
3121       rtx temp = op0;
3122       op0 = op1;
3123       op1 = temp;
3124     }
3125
3126   /* Expand x*2.0 as x+x.  */
3127   if (GET_CODE (op1) == CONST_DOUBLE
3128       && SCALAR_FLOAT_MODE_P (mode))
3129     {
3130       REAL_VALUE_TYPE d;
3131       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3132
3133       if (REAL_VALUES_EQUAL (d, dconst2))
3134         {
3135           op0 = force_reg (GET_MODE (op0), op0);
3136           return expand_binop (mode, add_optab, op0, op0,
3137                                target, unsignedp, OPTAB_LIB_WIDEN);
3138         }
3139     }
3140
3141   /* This used to use umul_optab if unsigned, but for non-widening multiply
3142      there is no difference between signed and unsigned.  */
3143   op0 = expand_binop (mode,
3144                       ! unsignedp
3145                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3146                       ? smulv_optab : smul_optab,
3147                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3148   gcc_assert (op0);
3149   return op0;
3150 }
3151 \f
3152 /* Return the smallest n such that 2**n >= X.  */
3153
3154 int
3155 ceil_log2 (unsigned HOST_WIDE_INT x)
3156 {
3157   return floor_log2 (x - 1) + 1;
3158 }
3159
3160 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3161    replace division by D, and put the least significant N bits of the result
3162    in *MULTIPLIER_PTR and return the most significant bit.
3163
3164    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3165    needed precision is in PRECISION (should be <= N).
3166
3167    PRECISION should be as small as possible so this function can choose
3168    multiplier more freely.
3169
3170    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3171    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3172
3173    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3174    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3175
3176 static
3177 unsigned HOST_WIDE_INT
3178 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3179                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3180 {
3181   HOST_WIDE_INT mhigh_hi, mlow_hi;
3182   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3183   int lgup, post_shift;
3184   int pow, pow2;
3185   unsigned HOST_WIDE_INT nl, dummy1;
3186   HOST_WIDE_INT nh, dummy2;
3187
3188   /* lgup = ceil(log2(divisor)); */
3189   lgup = ceil_log2 (d);
3190
3191   gcc_assert (lgup <= n);
3192
3193   pow = n + lgup;
3194   pow2 = n + lgup - precision;
3195
3196   /* We could handle this with some effort, but this case is much
3197      better handled directly with a scc insn, so rely on caller using
3198      that.  */
3199   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3200
3201   /* mlow = 2^(N + lgup)/d */
3202  if (pow >= HOST_BITS_PER_WIDE_INT)
3203     {
3204       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3205       nl = 0;
3206     }
3207   else
3208     {
3209       nh = 0;
3210       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3211     }
3212   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3213                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3214
3215   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3216   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3217     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3218   else
3219     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3220   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3221                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3222
3223   gcc_assert (!mhigh_hi || nh - d < d);
3224   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3225   /* Assert that mlow < mhigh.  */
3226   gcc_assert (mlow_hi < mhigh_hi
3227               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3228
3229   /* If precision == N, then mlow, mhigh exceed 2^N
3230      (but they do not exceed 2^(N+1)).  */
3231
3232   /* Reduce to lowest terms.  */
3233   for (post_shift = lgup; post_shift > 0; post_shift--)
3234     {
3235       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3236       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3237       if (ml_lo >= mh_lo)
3238         break;
3239
3240       mlow_hi = 0;
3241       mlow_lo = ml_lo;
3242       mhigh_hi = 0;
3243       mhigh_lo = mh_lo;
3244     }
3245
3246   *post_shift_ptr = post_shift;
3247   *lgup_ptr = lgup;
3248   if (n < HOST_BITS_PER_WIDE_INT)
3249     {
3250       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3251       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3252       return mhigh_lo >= mask;
3253     }
3254   else
3255     {
3256       *multiplier_ptr = GEN_INT (mhigh_lo);
3257       return mhigh_hi;
3258     }
3259 }
3260
3261 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3262    congruent to 1 (mod 2**N).  */
3263
3264 static unsigned HOST_WIDE_INT
3265 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3266 {
3267   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3268
3269   /* The algorithm notes that the choice y = x satisfies
3270      x*y == 1 mod 2^3, since x is assumed odd.
3271      Each iteration doubles the number of bits of significance in y.  */
3272
3273   unsigned HOST_WIDE_INT mask;
3274   unsigned HOST_WIDE_INT y = x;
3275   int nbit = 3;
3276
3277   mask = (n == HOST_BITS_PER_WIDE_INT
3278           ? ~(unsigned HOST_WIDE_INT) 0
3279           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3280
3281   while (nbit < n)
3282     {
3283       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3284       nbit *= 2;
3285     }
3286   return y;
3287 }
3288
3289 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3290    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3291    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3292    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3293    become signed.
3294
3295    The result is put in TARGET if that is convenient.
3296
3297    MODE is the mode of operation.  */
3298
3299 rtx
3300 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3301                              rtx op1, rtx target, int unsignedp)
3302 {
3303   rtx tem;
3304   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3305
3306   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3307                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3308                       NULL_RTX, 0);
3309   tem = expand_and (mode, tem, op1, NULL_RTX);
3310   adj_operand
3311     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3312                      adj_operand);
3313
3314   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3315                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3316                       NULL_RTX, 0);
3317   tem = expand_and (mode, tem, op0, NULL_RTX);
3318   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3319                           target);
3320
3321   return target;
3322 }
3323
3324 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3325
3326 static rtx
3327 extract_high_half (enum machine_mode mode, rtx op)
3328 {
3329   enum machine_mode wider_mode;
3330
3331   if (mode == word_mode)
3332     return gen_highpart (mode, op);
3333
3334   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3335
3336   wider_mode = GET_MODE_WIDER_MODE (mode);
3337   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3338                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3339   return convert_modes (mode, wider_mode, op, 0);
3340 }
3341
3342 /* Like expand_mult_highpart, but only consider using a multiplication
3343    optab.  OP1 is an rtx for the constant operand.  */
3344
3345 static rtx
3346 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3347                             rtx target, int unsignedp, int max_cost)
3348 {
3349   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3350   enum machine_mode wider_mode;
3351   optab moptab;
3352   rtx tem;
3353   int size;
3354   bool speed = optimize_insn_for_speed_p ();
3355
3356   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3357
3358   wider_mode = GET_MODE_WIDER_MODE (mode);
3359   size = GET_MODE_BITSIZE (mode);
3360
3361   /* Firstly, try using a multiplication insn that only generates the needed
3362      high part of the product, and in the sign flavor of unsignedp.  */
3363   if (mul_highpart_cost[speed][mode] < max_cost)
3364     {
3365       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3366       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3367                           unsignedp, OPTAB_DIRECT);
3368       if (tem)
3369         return tem;
3370     }
3371
3372   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3373      Need to adjust the result after the multiplication.  */
3374   if (size - 1 < BITS_PER_WORD
3375       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3376           + 4 * add_cost[speed][mode] < max_cost))
3377     {
3378       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3379       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3380                           unsignedp, OPTAB_DIRECT);
3381       if (tem)
3382         /* We used the wrong signedness.  Adjust the result.  */
3383         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3384                                             tem, unsignedp);
3385     }
3386
3387   /* Try widening multiplication.  */
3388   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3389   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3390       && mul_widen_cost[speed][wider_mode] < max_cost)
3391     {
3392       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3393                           unsignedp, OPTAB_WIDEN);
3394       if (tem)
3395         return extract_high_half (mode, tem);
3396     }
3397
3398   /* Try widening the mode and perform a non-widening multiplication.  */
3399   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3400       && size - 1 < BITS_PER_WORD
3401       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3402     {
3403       rtx insns, wop0, wop1;
3404
3405       /* We need to widen the operands, for example to ensure the
3406          constant multiplier is correctly sign or zero extended.
3407          Use a sequence to clean-up any instructions emitted by
3408          the conversions if things don't work out.  */
3409       start_sequence ();
3410       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3411       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3412       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3413                           unsignedp, OPTAB_WIDEN);
3414       insns = get_insns ();
3415       end_sequence ();
3416
3417       if (tem)
3418         {
3419           emit_insn (insns);
3420           return extract_high_half (mode, tem);
3421         }
3422     }
3423
3424   /* Try widening multiplication of opposite signedness, and adjust.  */
3425   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3426   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3427       && size - 1 < BITS_PER_WORD
3428       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3429           + 4 * add_cost[speed][mode] < max_cost))
3430     {
3431       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3432                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3433       if (tem != 0)
3434         {
3435           tem = extract_high_half (mode, tem);
3436           /* We used the wrong signedness.  Adjust the result.  */
3437           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3438                                               target, unsignedp);
3439         }
3440     }
3441
3442   return 0;
3443 }
3444
3445 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3446    putting the high half of the result in TARGET if that is convenient,
3447    and return where the result is.  If the operation can not be performed,
3448    0 is returned.
3449
3450    MODE is the mode of operation and result.
3451
3452    UNSIGNEDP nonzero means unsigned multiply.
3453
3454    MAX_COST is the total allowed cost for the expanded RTL.  */
3455
3456 static rtx
3457 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3458                       rtx target, int unsignedp, int max_cost)
3459 {
3460   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3461   unsigned HOST_WIDE_INT cnst1;
3462   int extra_cost;
3463   bool sign_adjust = false;
3464   enum mult_variant variant;
3465   struct algorithm alg;
3466   rtx tem;
3467   bool speed = optimize_insn_for_speed_p ();
3468
3469   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3470   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3471   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3472
3473   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3474
3475   /* We can't optimize modes wider than BITS_PER_WORD.
3476      ??? We might be able to perform double-word arithmetic if
3477      mode == word_mode, however all the cost calculations in
3478      synth_mult etc. assume single-word operations.  */
3479   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3480     return expand_mult_highpart_optab (mode, op0, op1, target,
3481                                        unsignedp, max_cost);
3482
3483   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3484
3485   /* Check whether we try to multiply by a negative constant.  */
3486   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3487     {
3488       sign_adjust = true;
3489       extra_cost += add_cost[speed][mode];
3490     }
3491
3492   /* See whether shift/add multiplication is cheap enough.  */
3493   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3494                            max_cost - extra_cost))
3495     {
3496       /* See whether the specialized multiplication optabs are
3497          cheaper than the shift/add version.  */
3498       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3499                                         alg.cost.cost + extra_cost);
3500       if (tem)
3501         return tem;
3502
3503       tem = convert_to_mode (wider_mode, op0, unsignedp);
3504       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3505       tem = extract_high_half (mode, tem);
3506
3507       /* Adjust result for signedness.  */
3508       if (sign_adjust)
3509         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3510
3511       return tem;
3512     }
3513   return expand_mult_highpart_optab (mode, op0, op1, target,
3514                                      unsignedp, max_cost);
3515 }
3516
3517
3518 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3519
3520 static rtx
3521 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3522 {
3523   unsigned HOST_WIDE_INT masklow, maskhigh;
3524   rtx result, temp, shift, label;
3525   int logd;
3526
3527   logd = floor_log2 (d);
3528   result = gen_reg_rtx (mode);
3529
3530   /* Avoid conditional branches when they're expensive.  */
3531   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3532       && optimize_insn_for_speed_p ())
3533     {
3534       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3535                                       mode, 0, -1);
3536       if (signmask)
3537         {
3538           signmask = force_reg (mode, signmask);
3539           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3540           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3541
3542           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3543              which instruction sequence to use.  If logical right shifts
3544              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3545              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3546
3547           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3548           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3549               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3550             {
3551               temp = expand_binop (mode, xor_optab, op0, signmask,
3552                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3553               temp = expand_binop (mode, sub_optab, temp, signmask,
3554                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3555               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3556                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3557               temp = expand_binop (mode, xor_optab, temp, signmask,
3558                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3559               temp = expand_binop (mode, sub_optab, temp, signmask,
3560                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3561             }
3562           else
3563             {
3564               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3565                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3566               signmask = force_reg (mode, signmask);
3567
3568               temp = expand_binop (mode, add_optab, op0, signmask,
3569                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3570               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3571                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3572               temp = expand_binop (mode, sub_optab, temp, signmask,
3573                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3574             }
3575           return temp;
3576         }
3577     }
3578
3579   /* Mask contains the mode's signbit and the significant bits of the
3580      modulus.  By including the signbit in the operation, many targets
3581      can avoid an explicit compare operation in the following comparison
3582      against zero.  */
3583
3584   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3585   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3586     {
3587       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3588       maskhigh = -1;
3589     }
3590   else
3591     maskhigh = (HOST_WIDE_INT) -1
3592                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3593
3594   temp = expand_binop (mode, and_optab, op0,
3595                        immed_double_const (masklow, maskhigh, mode),
3596                        result, 1, OPTAB_LIB_WIDEN);
3597   if (temp != result)
3598     emit_move_insn (result, temp);
3599
3600   label = gen_label_rtx ();
3601   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3602
3603   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3604                        0, OPTAB_LIB_WIDEN);
3605   masklow = (HOST_WIDE_INT) -1 << logd;
3606   maskhigh = -1;
3607   temp = expand_binop (mode, ior_optab, temp,
3608                        immed_double_const (masklow, maskhigh, mode),
3609                        result, 1, OPTAB_LIB_WIDEN);
3610   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3611                        0, OPTAB_LIB_WIDEN);
3612   if (temp != result)
3613     emit_move_insn (result, temp);
3614   emit_label (label);
3615   return result;
3616 }
3617
3618 /* Expand signed division of OP0 by a power of two D in mode MODE.
3619    This routine is only called for positive values of D.  */
3620
3621 static rtx
3622 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3623 {
3624   rtx temp, label;
3625   tree shift;
3626   int logd;
3627
3628   logd = floor_log2 (d);
3629   shift = build_int_cst (NULL_TREE, logd);
3630
3631   if (d == 2
3632       && BRANCH_COST (optimize_insn_for_speed_p (),
3633                       false) >= 1)
3634     {
3635       temp = gen_reg_rtx (mode);
3636       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3637       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3638                            0, OPTAB_LIB_WIDEN);
3639       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3640     }
3641
3642 #ifdef HAVE_conditional_move
3643   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3644       >= 2)
3645     {
3646       rtx temp2;
3647
3648       /* ??? emit_conditional_move forces a stack adjustment via
3649          compare_from_rtx so, if the sequence is discarded, it will
3650          be lost.  Do it now instead.  */
3651       do_pending_stack_adjust ();
3652
3653       start_sequence ();
3654       temp2 = copy_to_mode_reg (mode, op0);
3655       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3656                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3657       temp = force_reg (mode, temp);
3658
3659       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3660       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3661                                      mode, temp, temp2, mode, 0);
3662       if (temp2)
3663         {
3664           rtx seq = get_insns ();
3665           end_sequence ();
3666           emit_insn (seq);
3667           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3668         }
3669       end_sequence ();
3670     }
3671 #endif
3672
3673   if (BRANCH_COST (optimize_insn_for_speed_p (),
3674                    false) >= 2)
3675     {
3676       int ushift = GET_MODE_BITSIZE (mode) - logd;
3677
3678       temp = gen_reg_rtx (mode);
3679       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3680       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3681         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3682                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3683       else
3684         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3685                              build_int_cst (NULL_TREE, ushift),
3686                              NULL_RTX, 1);
3687       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3688                            0, OPTAB_LIB_WIDEN);
3689       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3690     }
3691
3692   label = gen_label_rtx ();
3693   temp = copy_to_mode_reg (mode, op0);
3694   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3695   expand_inc (temp, GEN_INT (d - 1));
3696   emit_label (label);
3697   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3698 }
3699 \f
3700 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3701    if that is convenient, and returning where the result is.
3702    You may request either the quotient or the remainder as the result;
3703    specify REM_FLAG nonzero to get the remainder.
3704
3705    CODE is the expression code for which kind of division this is;
3706    it controls how rounding is done.  MODE is the machine mode to use.
3707    UNSIGNEDP nonzero means do unsigned division.  */
3708
3709 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3710    and then correct it by or'ing in missing high bits
3711    if result of ANDI is nonzero.
3712    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3713    This could optimize to a bfexts instruction.
3714    But C doesn't use these operations, so their optimizations are
3715    left for later.  */
3716 /* ??? For modulo, we don't actually need the highpart of the first product,
3717    the low part will do nicely.  And for small divisors, the second multiply
3718    can also be a low-part only multiply or even be completely left out.
3719    E.g. to calculate the remainder of a division by 3 with a 32 bit
3720    multiply, multiply with 0x55555556 and extract the upper two bits;
3721    the result is exact for inputs up to 0x1fffffff.
3722    The input range can be reduced by using cross-sum rules.
3723    For odd divisors >= 3, the following table gives right shift counts
3724    so that if a number is shifted by an integer multiple of the given
3725    amount, the remainder stays the same:
3726    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3727    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3728    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3729    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3730    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3731
3732    Cross-sum rules for even numbers can be derived by leaving as many bits
3733    to the right alone as the divisor has zeros to the right.
3734    E.g. if x is an unsigned 32 bit number:
3735    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3736    */
3737
3738 rtx
3739 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3740                rtx op0, rtx op1, rtx target, int unsignedp)
3741 {
3742   enum machine_mode compute_mode;
3743   rtx tquotient;
3744   rtx quotient = 0, remainder = 0;
3745   rtx last;
3746   int size;
3747   rtx insn, set;
3748   optab optab1, optab2;
3749   int op1_is_constant, op1_is_pow2 = 0;
3750   int max_cost, extra_cost;
3751   static HOST_WIDE_INT last_div_const = 0;
3752   static HOST_WIDE_INT ext_op1;
3753   bool speed = optimize_insn_for_speed_p ();
3754
3755   op1_is_constant = GET_CODE (op1) == CONST_INT;
3756   if (op1_is_constant)
3757     {
3758       ext_op1 = INTVAL (op1);
3759       if (unsignedp)
3760         ext_op1 &= GET_MODE_MASK (mode);
3761       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3762                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3763     }
3764
3765   /*
3766      This is the structure of expand_divmod:
3767
3768      First comes code to fix up the operands so we can perform the operations
3769      correctly and efficiently.
3770
3771      Second comes a switch statement with code specific for each rounding mode.
3772      For some special operands this code emits all RTL for the desired
3773      operation, for other cases, it generates only a quotient and stores it in
3774      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3775      to indicate that it has not done anything.
3776
3777      Last comes code that finishes the operation.  If QUOTIENT is set and
3778      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3779      QUOTIENT is not set, it is computed using trunc rounding.
3780
3781      We try to generate special code for division and remainder when OP1 is a
3782      constant.  If |OP1| = 2**n we can use shifts and some other fast
3783      operations.  For other values of OP1, we compute a carefully selected
3784      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3785      by m.
3786
3787      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3788      half of the product.  Different strategies for generating the product are
3789      implemented in expand_mult_highpart.
3790
3791      If what we actually want is the remainder, we generate that by another
3792      by-constant multiplication and a subtraction.  */
3793
3794   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3795      code below will malfunction if we are, so check here and handle
3796      the special case if so.  */
3797   if (op1 == const1_rtx)
3798     return rem_flag ? const0_rtx : op0;
3799
3800     /* When dividing by -1, we could get an overflow.
3801      negv_optab can handle overflows.  */
3802   if (! unsignedp && op1 == constm1_rtx)
3803     {
3804       if (rem_flag)
3805         return const0_rtx;
3806       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3807                           ? negv_optab : neg_optab, op0, target, 0);
3808     }
3809
3810   if (target
3811       /* Don't use the function value register as a target
3812          since we have to read it as well as write it,
3813          and function-inlining gets confused by this.  */
3814       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3815           /* Don't clobber an operand while doing a multi-step calculation.  */
3816           || ((rem_flag || op1_is_constant)
3817               && (reg_mentioned_p (target, op0)
3818                   || (MEM_P (op0) && MEM_P (target))))
3819           || reg_mentioned_p (target, op1)
3820           || (MEM_P (op1) && MEM_P (target))))
3821     target = 0;
3822
3823   /* Get the mode in which to perform this computation.  Normally it will
3824      be MODE, but sometimes we can't do the desired operation in MODE.
3825      If so, pick a wider mode in which we can do the operation.  Convert
3826      to that mode at the start to avoid repeated conversions.
3827
3828      First see what operations we need.  These depend on the expression
3829      we are evaluating.  (We assume that divxx3 insns exist under the
3830      same conditions that modxx3 insns and that these insns don't normally
3831      fail.  If these assumptions are not correct, we may generate less
3832      efficient code in some cases.)
3833
3834      Then see if we find a mode in which we can open-code that operation
3835      (either a division, modulus, or shift).  Finally, check for the smallest
3836      mode for which we can do the operation with a library call.  */
3837
3838   /* We might want to refine this now that we have division-by-constant
3839      optimization.  Since expand_mult_highpart tries so many variants, it is
3840      not straightforward to generalize this.  Maybe we should make an array
3841      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3842
3843   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3844             ? (unsignedp ? lshr_optab : ashr_optab)
3845             : (unsignedp ? udiv_optab : sdiv_optab));
3846   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3847             ? optab1
3848             : (unsignedp ? udivmod_optab : sdivmod_optab));
3849
3850   for (compute_mode = mode; compute_mode != VOIDmode;
3851        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3852     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3853         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3854       break;
3855
3856   if (compute_mode == VOIDmode)
3857     for (compute_mode = mode; compute_mode != VOIDmode;
3858          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3859       if (optab_libfunc (optab1, compute_mode)
3860           || optab_libfunc (optab2, compute_mode))
3861         break;
3862
3863   /* If we still couldn't find a mode, use MODE, but expand_binop will
3864      probably die.  */
3865   if (compute_mode == VOIDmode)
3866     compute_mode = mode;
3867
3868   if (target && GET_MODE (target) == compute_mode)
3869     tquotient = target;
3870   else
3871     tquotient = gen_reg_rtx (compute_mode);
3872
3873   size = GET_MODE_BITSIZE (compute_mode);
3874 #if 0
3875   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3876      (mode), and thereby get better code when OP1 is a constant.  Do that
3877      later.  It will require going over all usages of SIZE below.  */
3878   size = GET_MODE_BITSIZE (mode);
3879 #endif
3880
3881   /* Only deduct something for a REM if the last divide done was
3882      for a different constant.   Then set the constant of the last
3883      divide.  */
3884   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3885   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3886                      && INTVAL (op1) == last_div_const))
3887     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3888
3889   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3890
3891   /* Now convert to the best mode to use.  */
3892   if (compute_mode != mode)
3893     {
3894       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3895       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3896
3897       /* convert_modes may have placed op1 into a register, so we
3898          must recompute the following.  */
3899       op1_is_constant = GET_CODE (op1) == CONST_INT;
3900       op1_is_pow2 = (op1_is_constant
3901                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3902                           || (! unsignedp
3903                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3904     }
3905
3906   /* If one of the operands is a volatile MEM, copy it into a register.  */
3907
3908   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3909     op0 = force_reg (compute_mode, op0);
3910   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3911     op1 = force_reg (compute_mode, op1);
3912
3913   /* If we need the remainder or if OP1 is constant, we need to
3914      put OP0 in a register in case it has any queued subexpressions.  */
3915   if (rem_flag || op1_is_constant)
3916     op0 = force_reg (compute_mode, op0);
3917
3918   last = get_last_insn ();
3919
3920   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3921   if (unsignedp)
3922     {
3923       if (code == FLOOR_DIV_EXPR)
3924         code = TRUNC_DIV_EXPR;
3925       if (code == FLOOR_MOD_EXPR)
3926         code = TRUNC_MOD_EXPR;
3927       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3928         code = TRUNC_DIV_EXPR;
3929     }
3930
3931   if (op1 != const0_rtx)
3932     switch (code)
3933       {
3934       case TRUNC_MOD_EXPR:
3935       case TRUNC_DIV_EXPR:
3936         if (op1_is_constant)
3937           {
3938             if (unsignedp)
3939               {
3940                 unsigned HOST_WIDE_INT mh;
3941                 int pre_shift, post_shift;
3942                 int dummy;
3943                 rtx ml;
3944                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3945                                             & GET_MODE_MASK (compute_mode));
3946
3947                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3948                   {
3949                     pre_shift = floor_log2 (d);
3950                     if (rem_flag)
3951                       {
3952                         remainder
3953                           = expand_binop (compute_mode, and_optab, op0,
3954                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3955                                           remainder, 1,
3956                                           OPTAB_LIB_WIDEN);
3957                         if (remainder)
3958                           return gen_lowpart (mode, remainder);
3959                       }
3960                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3961                                              build_int_cst (NULL_TREE,
3962                                                             pre_shift),
3963                                              tquotient, 1);
3964                   }
3965                 else if (size <= HOST_BITS_PER_WIDE_INT)
3966                   {
3967                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3968                       {
3969                         /* Most significant bit of divisor is set; emit an scc
3970                            insn.  */
3971                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3972                                                     compute_mode, 1, 1);
3973                         if (quotient == 0)
3974                           goto fail1;
3975                       }
3976                     else
3977                       {
3978                         /* Find a suitable multiplier and right shift count
3979                            instead of multiplying with D.  */
3980
3981                         mh = choose_multiplier (d, size, size,
3982                                                 &ml, &post_shift, &dummy);
3983
3984                         /* If the suggested multiplier is more than SIZE bits,
3985                            we can do better for even divisors, using an
3986                            initial right shift.  */
3987                         if (mh != 0 && (d & 1) == 0)
3988                           {
3989                             pre_shift = floor_log2 (d & -d);
3990                             mh = choose_multiplier (d >> pre_shift, size,
3991                                                     size - pre_shift,
3992                                                     &ml, &post_shift, &dummy);
3993                             gcc_assert (!mh);
3994                           }
3995                         else
3996                           pre_shift = 0;
3997
3998                         if (mh != 0)
3999                           {
4000                             rtx t1, t2, t3, t4;
4001
4002                             if (post_shift - 1 >= BITS_PER_WORD)
4003                               goto fail1;
4004
4005                             extra_cost
4006                               = (shift_cost[speed][compute_mode][post_shift - 1]
4007                                  + shift_cost[speed][compute_mode][1]
4008                                  + 2 * add_cost[speed][compute_mode]);
4009                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4010                                                        NULL_RTX, 1,
4011                                                        max_cost - extra_cost);
4012                             if (t1 == 0)
4013                               goto fail1;
4014                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4015                                                                op0, t1),
4016                                                 NULL_RTX);
4017                             t3 = expand_shift
4018                               (RSHIFT_EXPR, compute_mode, t2,
4019                                build_int_cst (NULL_TREE, 1),
4020                                NULL_RTX,1);
4021                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4022                                                               t1, t3),
4023                                                 NULL_RTX);
4024                             quotient = expand_shift
4025                               (RSHIFT_EXPR, compute_mode, t4,
4026                                build_int_cst (NULL_TREE, post_shift - 1),
4027                                tquotient, 1);
4028                           }
4029                         else
4030                           {
4031                             rtx t1, t2;
4032
4033                             if (pre_shift >= BITS_PER_WORD
4034                                 || post_shift >= BITS_PER_WORD)
4035                               goto fail1;
4036
4037                             t1 = expand_shift
4038                               (RSHIFT_EXPR, compute_mode, op0,
4039                                build_int_cst (NULL_TREE, pre_shift),
4040                                NULL_RTX, 1);
4041                             extra_cost
4042                               = (shift_cost[speed][compute_mode][pre_shift]
4043                                  + shift_cost[speed][compute_mode][post_shift]);
4044                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4045                                                        NULL_RTX, 1,
4046                                                        max_cost - extra_cost);
4047                             if (t2 == 0)
4048                               goto fail1;
4049                             quotient = expand_shift
4050                               (RSHIFT_EXPR, compute_mode, t2,
4051                                build_int_cst (NULL_TREE, post_shift),
4052                                tquotient, 1);
4053                           }
4054                       }
4055                   }
4056                 else            /* Too wide mode to use tricky code */
4057                   break;
4058
4059                 insn = get_last_insn ();
4060                 if (insn != last
4061                     && (set = single_set (insn)) != 0
4062                     && SET_DEST (set) == quotient)
4063                   set_unique_reg_note (insn,
4064                                        REG_EQUAL,
4065                                        gen_rtx_UDIV (compute_mode, op0, op1));
4066               }
4067             else                /* TRUNC_DIV, signed */
4068               {
4069                 unsigned HOST_WIDE_INT ml;
4070                 int lgup, post_shift;
4071                 rtx mlr;
4072                 HOST_WIDE_INT d = INTVAL (op1);
4073                 unsigned HOST_WIDE_INT abs_d;
4074
4075                 /* Since d might be INT_MIN, we have to cast to
4076                    unsigned HOST_WIDE_INT before negating to avoid
4077                    undefined signed overflow.  */
4078                 abs_d = (d >= 0
4079                          ? (unsigned HOST_WIDE_INT) d
4080                          : - (unsigned HOST_WIDE_INT) d);
4081
4082                 /* n rem d = n rem -d */
4083                 if (rem_flag && d < 0)
4084                   {
4085                     d = abs_d;
4086                     op1 = gen_int_mode (abs_d, compute_mode);
4087                   }
4088
4089                 if (d == 1)
4090                   quotient = op0;
4091                 else if (d == -1)
4092                   quotient = expand_unop (compute_mode, neg_optab, op0,
4093                                           tquotient, 0);
4094                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4095                   {
4096                     /* This case is not handled correctly below.  */
4097                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4098                                                 compute_mode, 1, 1);
4099                     if (quotient == 0)
4100                       goto fail1;
4101                   }
4102                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4103                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4104                                       : sdiv_pow2_cheap[speed][compute_mode])
4105                          /* We assume that cheap metric is true if the
4106                             optab has an expander for this mode.  */
4107                          && ((optab_handler ((rem_flag ? smod_optab
4108                                               : sdiv_optab),
4109                                               compute_mode)->insn_code
4110                               != CODE_FOR_nothing)
4111                              || (optab_handler(sdivmod_optab,
4112                                                compute_mode)
4113                                  ->insn_code != CODE_FOR_nothing)))
4114                   ;
4115                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4116                   {
4117                     if (rem_flag)
4118                       {
4119                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4120                         if (remainder)
4121                           return gen_lowpart (mode, remainder);
4122                       }
4123
4124                     if (sdiv_pow2_cheap[speed][compute_mode]
4125                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4126                              != CODE_FOR_nothing)
4127                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4128                                 != CODE_FOR_nothing)))
4129                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4130                                                 compute_mode, op0,
4131                                                 gen_int_mode (abs_d,
4132                                                               compute_mode),
4133                                                 NULL_RTX, 0);
4134                     else
4135                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4136
4137                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4138                        negate the quotient.  */
4139                     if (d < 0)
4140                       {
4141                         insn = get_last_insn ();
4142                         if (insn != last
4143                             && (set = single_set (insn)) != 0
4144                             && SET_DEST (set) == quotient
4145                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4146                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4147                           set_unique_reg_note (insn,
4148                                                REG_EQUAL,
4149                                                gen_rtx_DIV (compute_mode,
4150                                                             op0,
4151                                                             GEN_INT
4152                                                             (trunc_int_for_mode
4153                                                              (abs_d,
4154                                                               compute_mode))));
4155
4156                         quotient = expand_unop (compute_mode, neg_optab,
4157                                                 quotient, quotient, 0);
4158                       }
4159                   }
4160                 else if (size <= HOST_BITS_PER_WIDE_INT)
4161                   {
4162                     choose_multiplier (abs_d, size, size - 1,
4163                                        &mlr, &post_shift, &lgup);
4164                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4165                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4166                       {
4167                         rtx t1, t2, t3;
4168
4169                         if (post_shift >= BITS_PER_WORD
4170                             || size - 1 >= BITS_PER_WORD)
4171                           goto fail1;
4172
4173                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4174                                       + shift_cost[speed][compute_mode][size - 1]
4175                                       + add_cost[speed][compute_mode]);
4176                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4177                                                    NULL_RTX, 0,
4178                                                    max_cost - extra_cost);
4179                         if (t1 == 0)
4180                           goto fail1;
4181                         t2 = expand_shift
4182                           (RSHIFT_EXPR, compute_mode, t1,
4183                            build_int_cst (NULL_TREE, post_shift),
4184                            NULL_RTX, 0);
4185                         t3 = expand_shift
4186                           (RSHIFT_EXPR, compute_mode, op0,
4187                            build_int_cst (NULL_TREE, size - 1),
4188                            NULL_RTX, 0);
4189                         if (d < 0)
4190                           quotient
4191                             = force_operand (gen_rtx_MINUS (compute_mode,
4192                                                             t3, t2),
4193                                              tquotient);
4194                         else
4195                           quotient
4196                             = force_operand (gen_rtx_MINUS (compute_mode,
4197                                                             t2, t3),
4198                                              tquotient);
4199                       }
4200                     else
4201                       {
4202                         rtx t1, t2, t3, t4;
4203
4204                         if (post_shift >= BITS_PER_WORD
4205                             || size - 1 >= BITS_PER_WORD)
4206                           goto fail1;
4207
4208                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4209                         mlr = gen_int_mode (ml, compute_mode);
4210                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4211                                       + shift_cost[speed][compute_mode][size - 1]
4212                                       + 2 * add_cost[speed][compute_mode]);
4213                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4214                                                    NULL_RTX, 0,
4215                                                    max_cost - extra_cost);
4216                         if (t1 == 0)
4217                           goto fail1;
4218                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4219                                                           t1, op0),
4220                                             NULL_RTX);
4221                         t3 = expand_shift
4222                           (RSHIFT_EXPR, compute_mode, t2,
4223                            build_int_cst (NULL_TREE, post_shift),
4224                            NULL_RTX, 0);
4225                         t4 = expand_shift
4226                           (RSHIFT_EXPR, compute_mode, op0,
4227                            build_int_cst (NULL_TREE, size - 1),
4228                            NULL_RTX, 0);
4229                         if (d < 0)
4230                           quotient
4231                             = force_operand (gen_rtx_MINUS (compute_mode,
4232                                                             t4, t3),
4233                                              tquotient);
4234                         else
4235                           quotient
4236                             = force_operand (gen_rtx_MINUS (compute_mode,
4237                                                             t3, t4),
4238                                              tquotient);
4239                       }
4240                   }
4241                 else            /* Too wide mode to use tricky code */
4242                   break;
4243
4244                 insn = get_last_insn ();
4245                 if (insn != last
4246                     && (set = single_set (insn)) != 0
4247                     && SET_DEST (set) == quotient)
4248                   set_unique_reg_note (insn,
4249                                        REG_EQUAL,
4250                                        gen_rtx_DIV (compute_mode, op0, op1));
4251               }
4252             break;
4253           }
4254       fail1:
4255         delete_insns_since (last);
4256         break;
4257
4258       case FLOOR_DIV_EXPR:
4259       case FLOOR_MOD_EXPR:
4260       /* We will come here only for signed operations.  */
4261         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4262           {
4263             unsigned HOST_WIDE_INT mh;
4264             int pre_shift, lgup, post_shift;
4265             HOST_WIDE_INT d = INTVAL (op1);
4266             rtx ml;
4267
4268             if (d > 0)
4269               {
4270                 /* We could just as easily deal with negative constants here,
4271                    but it does not seem worth the trouble for GCC 2.6.  */
4272                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4273                   {
4274                     pre_shift = floor_log2 (d);
4275                     if (rem_flag)
4276                       {
4277                         remainder = expand_binop (compute_mode, and_optab, op0,
4278                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4279                                                   remainder, 0, OPTAB_LIB_WIDEN);
4280                         if (remainder)
4281                           return gen_lowpart (mode, remainder);
4282                       }
4283                     quotient = expand_shift
4284                       (RSHIFT_EXPR, compute_mode, op0,
4285                        build_int_cst (NULL_TREE, pre_shift),
4286                        tquotient, 0);
4287                   }
4288                 else
4289                   {
4290                     rtx t1, t2, t3, t4;
4291
4292                     mh = choose_multiplier (d, size, size - 1,
4293                                             &ml, &post_shift, &lgup);
4294                     gcc_assert (!mh);
4295
4296                     if (post_shift < BITS_PER_WORD
4297                         && size - 1 < BITS_PER_WORD)
4298                       {
4299                         t1 = expand_shift
4300                           (RSHIFT_EXPR, compute_mode, op0,
4301                            build_int_cst (NULL_TREE, size - 1),
4302                            NULL_RTX, 0);
4303                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4304                                            NULL_RTX, 0, OPTAB_WIDEN);
4305                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4306                                       + shift_cost[speed][compute_mode][size - 1]
4307                                       + 2 * add_cost[speed][compute_mode]);
4308                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4309                                                    NULL_RTX, 1,
4310                                                    max_cost - extra_cost);
4311                         if (t3 != 0)
4312                           {
4313                             t4 = expand_shift
4314                               (RSHIFT_EXPR, compute_mode, t3,
4315                                build_int_cst (NULL_TREE, post_shift),
4316                                NULL_RTX, 1);
4317                             quotient = expand_binop (compute_mode, xor_optab,
4318                                                      t4, t1, tquotient, 0,
4319                                                      OPTAB_WIDEN);
4320                           }
4321                       }
4322                   }
4323               }
4324             else
4325               {
4326                 rtx nsign, t1, t2, t3, t4;
4327                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4328                                                   op0, constm1_rtx), NULL_RTX);
4329                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4330                                    0, OPTAB_WIDEN);
4331                 nsign = expand_shift
4332                   (RSHIFT_EXPR, compute_mode, t2,
4333                    build_int_cst (NULL_TREE, size - 1),
4334                    NULL_RTX, 0);
4335                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4336                                     NULL_RTX);
4337                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4338                                     NULL_RTX, 0);
4339                 if (t4)
4340                   {
4341                     rtx t5;
4342                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4343                                       NULL_RTX, 0);
4344                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4345                                                             t4, t5),
4346                                               tquotient);
4347                   }
4348               }
4349           }
4350
4351         if (quotient != 0)
4352           break;
4353         delete_insns_since (last);
4354
4355         /* Try using an instruction that produces both the quotient and
4356            remainder, using truncation.  We can easily compensate the quotient
4357            or remainder to get floor rounding, once we have the remainder.
4358            Notice that we compute also the final remainder value here,
4359            and return the result right away.  */
4360         if (target == 0 || GET_MODE (target) != compute_mode)
4361           target = gen_reg_rtx (compute_mode);
4362
4363         if (rem_flag)
4364           {
4365             remainder
4366               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4367             quotient = gen_reg_rtx (compute_mode);
4368           }
4369         else
4370           {
4371             quotient
4372               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4373             remainder = gen_reg_rtx (compute_mode);
4374           }
4375
4376         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4377                                  quotient, remainder, 0))
4378           {
4379             /* This could be computed with a branch-less sequence.
4380                Save that for later.  */
4381             rtx tem;
4382             rtx label = gen_label_rtx ();
4383             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4384             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4385                                 NULL_RTX, 0, OPTAB_WIDEN);
4386             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4387             expand_dec (quotient, const1_rtx);
4388             expand_inc (remainder, op1);
4389             emit_label (label);
4390             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4391           }
4392
4393         /* No luck with division elimination or divmod.  Have to do it
4394            by conditionally adjusting op0 *and* the result.  */
4395         {
4396           rtx label1, label2, label3, label4, label5;
4397           rtx adjusted_op0;
4398           rtx tem;
4399
4400           quotient = gen_reg_rtx (compute_mode);
4401           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4402           label1 = gen_label_rtx ();
4403           label2 = gen_label_rtx ();
4404           label3 = gen_label_rtx ();
4405           label4 = gen_label_rtx ();
4406           label5 = gen_label_rtx ();
4407           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4408           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4409           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4410                               quotient, 0, OPTAB_LIB_WIDEN);
4411           if (tem != quotient)
4412             emit_move_insn (quotient, tem);
4413           emit_jump_insn (gen_jump (label5));
4414           emit_barrier ();
4415           emit_label (label1);
4416           expand_inc (adjusted_op0, const1_rtx);
4417           emit_jump_insn (gen_jump (label4));
4418           emit_barrier ();
4419           emit_label (label2);
4420           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4421           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4422                               quotient, 0, OPTAB_LIB_WIDEN);
4423           if (tem != quotient)
4424             emit_move_insn (quotient, tem);
4425           emit_jump_insn (gen_jump (label5));
4426           emit_barrier ();
4427           emit_label (label3);
4428           expand_dec (adjusted_op0, const1_rtx);
4429           emit_label (label4);
4430           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4431                               quotient, 0, OPTAB_LIB_WIDEN);
4432           if (tem != quotient)
4433             emit_move_insn (quotient, tem);
4434           expand_dec (quotient, const1_rtx);
4435           emit_label (label5);
4436         }
4437         break;
4438
4439       case CEIL_DIV_EXPR:
4440       case CEIL_MOD_EXPR:
4441         if (unsignedp)
4442           {
4443             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4444               {
4445                 rtx t1, t2, t3;
4446                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4447                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4448                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4449                                    tquotient, 1);
4450                 t2 = expand_binop (compute_mode, and_optab, op0,
4451                                    GEN_INT (d - 1),
4452                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4453                 t3 = gen_reg_rtx (compute_mode);
4454                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4455                                       compute_mode, 1, 1);
4456                 if (t3 == 0)
4457                   {
4458                     rtx lab;
4459                     lab = gen_label_rtx ();
4460                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4461                     expand_inc (t1, const1_rtx);
4462                     emit_label (lab);
4463                     quotient = t1;
4464                   }
4465                 else
4466                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4467                                                           t1, t3),
4468                                             tquotient);
4469                 break;
4470               }
4471
4472             /* Try using an instruction that produces both the quotient and
4473                remainder, using truncation.  We can easily compensate the
4474                quotient or remainder to get ceiling rounding, once we have the
4475                remainder.  Notice that we compute also the final remainder
4476                value here, and return the result right away.  */
4477             if (target == 0 || GET_MODE (target) != compute_mode)
4478               target = gen_reg_rtx (compute_mode);
4479
4480             if (rem_flag)
4481               {
4482                 remainder = (REG_P (target)
4483                              ? target : gen_reg_rtx (compute_mode));
4484                 quotient = gen_reg_rtx (compute_mode);
4485               }
4486             else
4487               {
4488                 quotient = (REG_P (target)
4489                             ? target : gen_reg_rtx (compute_mode));
4490                 remainder = gen_reg_rtx (compute_mode);
4491               }
4492
4493             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4494                                      remainder, 1))
4495               {
4496                 /* This could be computed with a branch-less sequence.
4497                    Save that for later.  */
4498                 rtx label = gen_label_rtx ();
4499                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4500                                  compute_mode, label);
4501                 expand_inc (quotient, const1_rtx);
4502                 expand_dec (remainder, op1);
4503                 emit_label (label);
4504                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4505               }
4506
4507             /* No luck with division elimination or divmod.  Have to do it
4508                by conditionally adjusting op0 *and* the result.  */
4509             {
4510               rtx label1, label2;
4511               rtx adjusted_op0, tem;
4512
4513               quotient = gen_reg_rtx (compute_mode);
4514               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4515               label1 = gen_label_rtx ();
4516               label2 = gen_label_rtx ();
4517               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4518                                compute_mode, label1);
4519               emit_move_insn  (quotient, const0_rtx);
4520               emit_jump_insn (gen_jump (label2));
4521               emit_barrier ();
4522               emit_label (label1);
4523               expand_dec (adjusted_op0, const1_rtx);
4524               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4525                                   quotient, 1, OPTAB_LIB_WIDEN);
4526               if (tem != quotient)
4527                 emit_move_insn (quotient, tem);
4528               expand_inc (quotient, const1_rtx);
4529               emit_label (label2);
4530             }
4531           }
4532         else /* signed */
4533           {
4534             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4535                 && INTVAL (op1) >= 0)
4536               {
4537                 /* This is extremely similar to the code for the unsigned case
4538                    above.  For 2.7 we should merge these variants, but for
4539                    2.6.1 I don't want to touch the code for unsigned since that
4540                    get used in C.  The signed case will only be used by other
4541                    languages (Ada).  */
4542
4543                 rtx t1, t2, t3;
4544                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4545                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4546                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4547                                    tquotient, 0);
4548                 t2 = expand_binop (compute_mode, and_optab, op0,
4549                                    GEN_INT (d - 1),
4550                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4551                 t3 = gen_reg_rtx (compute_mode);
4552                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4553                                       compute_mode, 1, 1);
4554                 if (t3 == 0)
4555                   {
4556                     rtx lab;
4557                     lab = gen_label_rtx ();
4558                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4559                     expand_inc (t1, const1_rtx);
4560                     emit_label (lab);
4561                     quotient = t1;
4562                   }
4563                 else
4564                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4565                                                           t1, t3),
4566                                             tquotient);
4567                 break;
4568               }
4569
4570             /* Try using an instruction that produces both the quotient and
4571                remainder, using truncation.  We can easily compensate the
4572                quotient or remainder to get ceiling rounding, once we have the
4573                remainder.  Notice that we compute also the final remainder
4574                value here, and return the result right away.  */
4575             if (target == 0 || GET_MODE (target) != compute_mode)
4576               target = gen_reg_rtx (compute_mode);
4577             if (rem_flag)
4578               {
4579                 remainder= (REG_P (target)
4580                             ? target : gen_reg_rtx (compute_mode));
4581                 quotient = gen_reg_rtx (compute_mode);
4582               }
4583             else
4584               {
4585                 quotient = (REG_P (target)
4586                             ? target : gen_reg_rtx (compute_mode));
4587                 remainder = gen_reg_rtx (compute_mode);
4588               }
4589
4590             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4591                                      remainder, 0))
4592               {
4593                 /* This could be computed with a branch-less sequence.
4594                    Save that for later.  */
4595                 rtx tem;
4596                 rtx label = gen_label_rtx ();
4597                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4598                                  compute_mode, label);
4599                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4600                                     NULL_RTX, 0, OPTAB_WIDEN);
4601                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4602                 expand_inc (quotient, const1_rtx);
4603                 expand_dec (remainder, op1);
4604                 emit_label (label);
4605                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4606               }
4607
4608             /* No luck with division elimination or divmod.  Have to do it
4609                by conditionally adjusting op0 *and* the result.  */
4610             {
4611               rtx label1, label2, label3, label4, label5;
4612               rtx adjusted_op0;
4613               rtx tem;
4614
4615               quotient = gen_reg_rtx (compute_mode);
4616               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4617               label1 = gen_label_rtx ();
4618               label2 = gen_label_rtx ();
4619               label3 = gen_label_rtx ();
4620               label4 = gen_label_rtx ();
4621               label5 = gen_label_rtx ();
4622               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4623               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4624                                compute_mode, label1);
4625               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4626                                   quotient, 0, OPTAB_LIB_WIDEN);
4627               if (tem != quotient)
4628                 emit_move_insn (quotient, tem);
4629               emit_jump_insn (gen_jump (label5));
4630               emit_barrier ();
4631               emit_label (label1);
4632               expand_dec (adjusted_op0, const1_rtx);
4633               emit_jump_insn (gen_jump (label4));
4634               emit_barrier ();
4635               emit_label (label2);
4636               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4637                                compute_mode, label3);
4638               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4639                                   quotient, 0, OPTAB_LIB_WIDEN);
4640               if (tem != quotient)
4641                 emit_move_insn (quotient, tem);
4642               emit_jump_insn (gen_jump (label5));
4643               emit_barrier ();
4644               emit_label (label3);
4645               expand_inc (adjusted_op0, const1_rtx);
4646               emit_label (label4);
4647               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4648                                   quotient, 0, OPTAB_LIB_WIDEN);
4649               if (tem != quotient)
4650                 emit_move_insn (quotient, tem);
4651               expand_inc (quotient, const1_rtx);
4652               emit_label (label5);
4653             }
4654           }
4655         break;
4656
4657       case EXACT_DIV_EXPR:
4658         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4659           {
4660             HOST_WIDE_INT d = INTVAL (op1);
4661             unsigned HOST_WIDE_INT ml;
4662             int pre_shift;
4663             rtx t1;
4664
4665             pre_shift = floor_log2 (d & -d);
4666             ml = invert_mod2n (d >> pre_shift, size);
4667             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4668                                build_int_cst (NULL_TREE, pre_shift),
4669                                NULL_RTX, unsignedp);
4670             quotient = expand_mult (compute_mode, t1,
4671                                     gen_int_mode (ml, compute_mode),
4672                                     NULL_RTX, 1);
4673
4674             insn = get_last_insn ();
4675             set_unique_reg_note (insn,
4676                                  REG_EQUAL,
4677                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4678                                                  compute_mode,
4679                                                  op0, op1));
4680           }
4681         break;
4682
4683       case ROUND_DIV_EXPR:
4684       case ROUND_MOD_EXPR:
4685         if (unsignedp)
4686           {
4687             rtx tem;
4688             rtx label;
4689             label = gen_label_rtx ();
4690             quotient = gen_reg_rtx (compute_mode);
4691             remainder = gen_reg_rtx (compute_mode);
4692             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4693               {
4694                 rtx tem;
4695                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4696                                          quotient, 1, OPTAB_LIB_WIDEN);
4697                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4698                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4699                                           remainder, 1, OPTAB_LIB_WIDEN);
4700               }
4701             tem = plus_constant (op1, -1);
4702             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4703                                 build_int_cst (NULL_TREE, 1),
4704                                 NULL_RTX, 1);
4705             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4706             expand_inc (quotient, const1_rtx);
4707             expand_dec (remainder, op1);
4708             emit_label (label);
4709           }
4710         else
4711           {
4712             rtx abs_rem, abs_op1, tem, mask;
4713             rtx label;
4714             label = gen_label_rtx ();
4715             quotient = gen_reg_rtx (compute_mode);
4716             remainder = gen_reg_rtx (compute_mode);
4717             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4718               {
4719                 rtx tem;
4720                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4721                                          quotient, 0, OPTAB_LIB_WIDEN);
4722                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4723                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4724                                           remainder, 0, OPTAB_LIB_WIDEN);
4725               }
4726             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4727             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4728             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4729                                 build_int_cst (NULL_TREE, 1),
4730                                 NULL_RTX, 1);
4731             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4732             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4733                                 NULL_RTX, 0, OPTAB_WIDEN);
4734             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4735                                  build_int_cst (NULL_TREE, size - 1),
4736                                  NULL_RTX, 0);
4737             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4738                                 NULL_RTX, 0, OPTAB_WIDEN);
4739             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4740                                 NULL_RTX, 0, OPTAB_WIDEN);
4741             expand_inc (quotient, tem);
4742             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4743                                 NULL_RTX, 0, OPTAB_WIDEN);
4744             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4745                                 NULL_RTX, 0, OPTAB_WIDEN);
4746             expand_dec (remainder, tem);
4747             emit_label (label);
4748           }
4749         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4750
4751       default:
4752         gcc_unreachable ();
4753       }
4754
4755   if (quotient == 0)
4756     {
4757       if (target && GET_MODE (target) != compute_mode)
4758         target = 0;
4759
4760       if (rem_flag)
4761         {
4762           /* Try to produce the remainder without producing the quotient.
4763              If we seem to have a divmod pattern that does not require widening,
4764              don't try widening here.  We should really have a WIDEN argument
4765              to expand_twoval_binop, since what we'd really like to do here is
4766              1) try a mod insn in compute_mode
4767              2) try a divmod insn in compute_mode
4768              3) try a div insn in compute_mode and multiply-subtract to get
4769                 remainder
4770              4) try the same things with widening allowed.  */
4771           remainder
4772             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4773                                  op0, op1, target,
4774                                  unsignedp,
4775                                  ((optab_handler (optab2, compute_mode)->insn_code
4776                                    != CODE_FOR_nothing)
4777                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4778           if (remainder == 0)
4779             {
4780               /* No luck there.  Can we do remainder and divide at once
4781                  without a library call?  */
4782               remainder = gen_reg_rtx (compute_mode);
4783               if (! expand_twoval_binop ((unsignedp
4784                                           ? udivmod_optab
4785                                           : sdivmod_optab),
4786                                          op0, op1,
4787                                          NULL_RTX, remainder, unsignedp))
4788                 remainder = 0;
4789             }
4790
4791           if (remainder)
4792             return gen_lowpart (mode, remainder);
4793         }
4794
4795       /* Produce the quotient.  Try a quotient insn, but not a library call.
4796          If we have a divmod in this mode, use it in preference to widening
4797          the div (for this test we assume it will not fail). Note that optab2
4798          is set to the one of the two optabs that the call below will use.  */
4799       quotient
4800         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4801                              op0, op1, rem_flag ? NULL_RTX : target,
4802                              unsignedp,
4803                              ((optab_handler (optab2, compute_mode)->insn_code
4804                                != CODE_FOR_nothing)
4805                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4806
4807       if (quotient == 0)
4808         {
4809           /* No luck there.  Try a quotient-and-remainder insn,
4810              keeping the quotient alone.  */
4811           quotient = gen_reg_rtx (compute_mode);
4812           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4813                                      op0, op1,
4814                                      quotient, NULL_RTX, unsignedp))
4815             {
4816               quotient = 0;
4817               if (! rem_flag)
4818                 /* Still no luck.  If we are not computing the remainder,
4819                    use a library call for the quotient.  */
4820                 quotient = sign_expand_binop (compute_mode,
4821                                               udiv_optab, sdiv_optab,
4822                                               op0, op1, target,
4823                                               unsignedp, OPTAB_LIB_WIDEN);
4824             }
4825         }
4826     }
4827
4828   if (rem_flag)
4829     {
4830       if (target && GET_MODE (target) != compute_mode)
4831         target = 0;
4832
4833       if (quotient == 0)
4834         {
4835           /* No divide instruction either.  Use library for remainder.  */
4836           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4837                                          op0, op1, target,
4838                                          unsignedp, OPTAB_LIB_WIDEN);
4839           /* No remainder function.  Try a quotient-and-remainder
4840              function, keeping the remainder.  */
4841           if (!remainder)
4842             {
4843               remainder = gen_reg_rtx (compute_mode);
4844               if (!expand_twoval_binop_libfunc
4845                   (unsignedp ? udivmod_optab : sdivmod_optab,
4846                    op0, op1,
4847                    NULL_RTX, remainder,
4848                    unsignedp ? UMOD : MOD))
4849                 remainder = NULL_RTX;
4850             }
4851         }
4852       else
4853         {
4854           /* We divided.  Now finish doing X - Y * (X / Y).  */
4855           remainder = expand_mult (compute_mode, quotient, op1,
4856                                    NULL_RTX, unsignedp);
4857           remainder = expand_binop (compute_mode, sub_optab, op0,
4858                                     remainder, target, unsignedp,
4859                                     OPTAB_LIB_WIDEN);
4860         }
4861     }
4862
4863   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4864 }
4865 \f
4866 /* Return a tree node with data type TYPE, describing the value of X.
4867    Usually this is an VAR_DECL, if there is no obvious better choice.
4868    X may be an expression, however we only support those expressions
4869    generated by loop.c.  */
4870
4871 tree
4872 make_tree (tree type, rtx x)
4873 {
4874   tree t;
4875
4876   switch (GET_CODE (x))
4877     {
4878     case CONST_INT:
4879       {
4880         HOST_WIDE_INT hi = 0;
4881
4882         if (INTVAL (x) < 0
4883             && !(TYPE_UNSIGNED (type)
4884                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4885                      < HOST_BITS_PER_WIDE_INT)))
4886           hi = -1;
4887
4888         t = build_int_cst_wide (type, INTVAL (x), hi);
4889
4890         return t;
4891       }
4892
4893     case CONST_DOUBLE:
4894       if (GET_MODE (x) == VOIDmode)
4895         t = build_int_cst_wide (type,
4896                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4897       else
4898         {
4899           REAL_VALUE_TYPE d;
4900
4901           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4902           t = build_real (type, d);
4903         }
4904
4905       return t;
4906
4907     case CONST_VECTOR:
4908       {
4909         int units = CONST_VECTOR_NUNITS (x);
4910         tree itype = TREE_TYPE (type);
4911         tree t = NULL_TREE;
4912         int i;
4913
4914
4915         /* Build a tree with vector elements.  */
4916         for (i = units - 1; i >= 0; --i)
4917           {
4918             rtx elt = CONST_VECTOR_ELT (x, i);
4919             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4920           }
4921
4922         return build_vector (type, t);
4923       }
4924
4925     case PLUS:
4926       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4927                           make_tree (type, XEXP (x, 1)));
4928
4929     case MINUS:
4930       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4931                           make_tree (type, XEXP (x, 1)));
4932
4933     case NEG:
4934       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4935
4936     case MULT:
4937       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4938                           make_tree (type, XEXP (x, 1)));
4939
4940     case ASHIFT:
4941       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4942                           make_tree (type, XEXP (x, 1)));
4943
4944     case LSHIFTRT:
4945       t = unsigned_type_for (type);
4946       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4947                                          make_tree (t, XEXP (x, 0)),
4948                                          make_tree (type, XEXP (x, 1))));
4949
4950     case ASHIFTRT:
4951       t = signed_type_for (type);
4952       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4953                                          make_tree (t, XEXP (x, 0)),
4954                                          make_tree (type, XEXP (x, 1))));
4955
4956     case DIV:
4957       if (TREE_CODE (type) != REAL_TYPE)
4958         t = signed_type_for (type);
4959       else
4960         t = type;
4961
4962       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4963                                          make_tree (t, XEXP (x, 0)),
4964                                          make_tree (t, XEXP (x, 1))));
4965     case UDIV:
4966       t = unsigned_type_for (type);
4967       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4968                                          make_tree (t, XEXP (x, 0)),
4969                                          make_tree (t, XEXP (x, 1))));
4970
4971     case SIGN_EXTEND:
4972     case ZERO_EXTEND:
4973       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4974                                           GET_CODE (x) == ZERO_EXTEND);
4975       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4976
4977     case CONST:
4978       return make_tree (type, XEXP (x, 0));
4979
4980     case SYMBOL_REF:
4981       t = SYMBOL_REF_DECL (x);
4982       if (t)
4983         return fold_convert (type, build_fold_addr_expr (t));
4984       /* else fall through.  */
4985
4986     default:
4987       t = build_decl (VAR_DECL, NULL_TREE, type);
4988
4989       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4990          ptr_mode.  So convert.  */
4991       if (POINTER_TYPE_P (type))
4992         x = convert_memory_address (TYPE_MODE (type), x);
4993
4994       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4995          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4996       t->decl_with_rtl.rtl = x;
4997
4998       return t;
4999     }
5000 }
5001 \f
5002 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5003    and returning TARGET.
5004
5005    If TARGET is 0, a pseudo-register or constant is returned.  */
5006
5007 rtx
5008 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5009 {
5010   rtx tem = 0;
5011
5012   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5013     tem = simplify_binary_operation (AND, mode, op0, op1);
5014   if (tem == 0)
5015     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5016
5017   if (target == 0)
5018     target = tem;
5019   else if (tem != target)
5020     emit_move_insn (target, tem);
5021   return target;
5022 }
5023 \f
5024 /* Helper function for emit_store_flag.  */
5025 static rtx
5026 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5027                    int normalizep)
5028 {
5029   rtx op0;
5030   enum machine_mode target_mode = GET_MODE (target);
5031
5032   /* If we are converting to a wider mode, first convert to
5033      TARGET_MODE, then normalize.  This produces better combining
5034      opportunities on machines that have a SIGN_EXTRACT when we are
5035      testing a single bit.  This mostly benefits the 68k.
5036
5037      If STORE_FLAG_VALUE does not have the sign bit set when
5038      interpreted in MODE, we can do this conversion as unsigned, which
5039      is usually more efficient.  */
5040   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5041     {
5042       convert_move (target, subtarget,
5043                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5044                     && 0 == (STORE_FLAG_VALUE
5045                              & ((HOST_WIDE_INT) 1
5046                                 << (GET_MODE_BITSIZE (mode) -1))));
5047       op0 = target;
5048       mode = target_mode;
5049     }
5050   else
5051     op0 = subtarget;
5052
5053   /* If we want to keep subexpressions around, don't reuse our last
5054      target.  */
5055   if (optimize)
5056     subtarget = 0;
5057
5058   /* Now normalize to the proper value in MODE.  Sometimes we don't
5059      have to do anything.  */
5060   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5061     ;
5062   /* STORE_FLAG_VALUE might be the most negative number, so write
5063      the comparison this way to avoid a compiler-time warning.  */
5064   else if (- normalizep == STORE_FLAG_VALUE)
5065     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5066
5067   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5068      it hard to use a value of just the sign bit due to ANSI integer
5069      constant typing rules.  */
5070   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5071            && (STORE_FLAG_VALUE
5072                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5073     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5074                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5075                         normalizep == 1);
5076   else
5077     {
5078       gcc_assert (STORE_FLAG_VALUE & 1);
5079
5080       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5081       if (normalizep == -1)
5082         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5083     }
5084
5085   /* If we were converting to a smaller mode, do the conversion now.  */
5086   if (target_mode != mode)
5087     {
5088       convert_move (target, op0, 0);
5089       return target;
5090     }
5091   else
5092     return op0;
5093 }
5094
5095 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5096    and storing in TARGET.  Normally return TARGET.
5097    Return 0 if that cannot be done.
5098
5099    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5100    it is VOIDmode, they cannot both be CONST_INT.
5101
5102    UNSIGNEDP is for the case where we have to widen the operands
5103    to perform the operation.  It says to use zero-extension.
5104
5105    NORMALIZEP is 1 if we should convert the result to be either zero
5106    or one.  Normalize is -1 if we should convert the result to be
5107    either zero or -1.  If NORMALIZEP is zero, the result will be left
5108    "raw" out of the scc insn.  */
5109
5110 rtx
5111 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5112                  enum machine_mode mode, int unsignedp, int normalizep)
5113 {
5114   rtx subtarget;
5115   enum insn_code icode;
5116   enum machine_mode compare_mode;
5117   enum machine_mode target_mode = GET_MODE (target);
5118   rtx tem;
5119   rtx last = get_last_insn ();
5120   rtx pattern, comparison;
5121
5122   if (unsignedp)
5123     code = unsigned_condition (code);
5124
5125   /* If one operand is constant, make it the second one.  Only do this
5126      if the other operand is not constant as well.  */
5127
5128   if (swap_commutative_operands_p (op0, op1))
5129     {
5130       tem = op0;
5131       op0 = op1;
5132       op1 = tem;
5133       code = swap_condition (code);
5134     }
5135
5136   if (mode == VOIDmode)
5137     mode = GET_MODE (op0);
5138
5139   /* For some comparisons with 1 and -1, we can convert this to
5140      comparisons with zero.  This will often produce more opportunities for
5141      store-flag insns.  */
5142
5143   switch (code)
5144     {
5145     case LT:
5146       if (op1 == const1_rtx)
5147         op1 = const0_rtx, code = LE;
5148       break;
5149     case LE:
5150       if (op1 == constm1_rtx)
5151         op1 = const0_rtx, code = LT;
5152       break;
5153     case GE:
5154       if (op1 == const1_rtx)
5155         op1 = const0_rtx, code = GT;
5156       break;
5157     case GT:
5158       if (op1 == constm1_rtx)
5159         op1 = const0_rtx, code = GE;
5160       break;
5161     case GEU:
5162       if (op1 == const1_rtx)
5163         op1 = const0_rtx, code = NE;
5164       break;
5165     case LTU:
5166       if (op1 == const1_rtx)
5167         op1 = const0_rtx, code = EQ;
5168       break;
5169     default:
5170       break;
5171     }
5172
5173   /* If we are comparing a double-word integer with zero or -1, we can
5174      convert the comparison into one involving a single word.  */
5175   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5176       && GET_MODE_CLASS (mode) == MODE_INT
5177       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5178     {
5179       if ((code == EQ || code == NE)
5180           && (op1 == const0_rtx || op1 == constm1_rtx))
5181         {
5182           rtx op00, op01, op0both;
5183
5184           /* Do a logical OR or AND of the two words and compare the
5185              result.  */
5186           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5187           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5188           op0both = expand_binop (word_mode,
5189                                   op1 == const0_rtx ? ior_optab : and_optab,
5190                                   op00, op01, NULL_RTX, unsignedp,
5191                                   OPTAB_DIRECT);
5192
5193           if (op0both != 0)
5194             return emit_store_flag (target, code, op0both, op1, word_mode,
5195                                     unsignedp, normalizep);
5196         }
5197       else if ((code == LT || code == GE) && op1 == const0_rtx)
5198         {
5199           rtx op0h;
5200
5201           /* If testing the sign bit, can just test on high word.  */
5202           op0h = simplify_gen_subreg (word_mode, op0, mode,
5203                                       subreg_highpart_offset (word_mode,
5204                                                               mode));
5205           return emit_store_flag (target, code, op0h, op1, word_mode,
5206                                   unsignedp, normalizep);
5207         }
5208     }
5209
5210   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5211      complement of A (for GE) and shifting the sign bit to the low bit.  */
5212   if (op1 == const0_rtx && (code == LT || code == GE)
5213       && GET_MODE_CLASS (mode) == MODE_INT
5214       && (normalizep || STORE_FLAG_VALUE == 1
5215           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5216               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5217                   == ((unsigned HOST_WIDE_INT) 1
5218                       << (GET_MODE_BITSIZE (mode) - 1))))))
5219     {
5220       subtarget = target;
5221
5222       /* If the result is to be wider than OP0, it is best to convert it
5223          first.  If it is to be narrower, it is *incorrect* to convert it
5224          first.  */
5225       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5226         {
5227           op0 = convert_modes (target_mode, mode, op0, 0);
5228           mode = target_mode;
5229         }
5230
5231       if (target_mode != mode)
5232         subtarget = 0;
5233
5234       if (code == GE)
5235         op0 = expand_unop (mode, one_cmpl_optab, op0,
5236                            ((STORE_FLAG_VALUE == 1 || normalizep)
5237                             ? 0 : subtarget), 0);
5238
5239       if (STORE_FLAG_VALUE == 1 || normalizep)
5240         /* If we are supposed to produce a 0/1 value, we want to do
5241            a logical shift from the sign bit to the low-order bit; for
5242            a -1/0 value, we do an arithmetic shift.  */
5243         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5244                             size_int (GET_MODE_BITSIZE (mode) - 1),
5245                             subtarget, normalizep != -1);
5246
5247       if (mode != target_mode)
5248         op0 = convert_modes (target_mode, mode, op0, 0);
5249
5250       return op0;
5251     }
5252
5253   icode = setcc_gen_code[(int) code];
5254
5255   if (icode != CODE_FOR_nothing)
5256     {
5257       insn_operand_predicate_fn pred;
5258
5259       /* We think we may be able to do this with a scc insn.  Emit the
5260          comparison and then the scc insn.  */
5261
5262       do_pending_stack_adjust ();
5263       last = get_last_insn ();
5264
5265       comparison
5266         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5267       if (CONSTANT_P (comparison))
5268         {
5269           switch (GET_CODE (comparison))
5270             {
5271             case CONST_INT:
5272               if (comparison == const0_rtx)
5273                 return const0_rtx;
5274               break;
5275
5276 #ifdef FLOAT_STORE_FLAG_VALUE
5277             case CONST_DOUBLE:
5278               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5279                 return const0_rtx;
5280               break;
5281 #endif
5282             default:
5283               gcc_unreachable ();
5284             }
5285
5286           if (normalizep == 1)
5287             return const1_rtx;
5288           if (normalizep == -1)
5289             return constm1_rtx;
5290           return const_true_rtx;
5291         }
5292
5293       /* The code of COMPARISON may not match CODE if compare_from_rtx
5294          decided to swap its operands and reverse the original code.
5295
5296          We know that compare_from_rtx returns either a CONST_INT or
5297          a new comparison code, so it is safe to just extract the
5298          code from COMPARISON.  */
5299       code = GET_CODE (comparison);
5300
5301       /* Get a reference to the target in the proper mode for this insn.  */
5302       compare_mode = insn_data[(int) icode].operand[0].mode;
5303       subtarget = target;
5304       pred = insn_data[(int) icode].operand[0].predicate;
5305       if (optimize || ! (*pred) (subtarget, compare_mode))
5306         subtarget = gen_reg_rtx (compare_mode);
5307
5308       pattern = GEN_FCN (icode) (subtarget);
5309       if (pattern)
5310         {
5311           emit_insn (pattern);
5312           return emit_store_flag_1 (target, subtarget, compare_mode,
5313                                     normalizep);
5314         }
5315     }
5316   else
5317     {
5318       /* We don't have an scc insn, so try a cstore insn.  */
5319
5320       for (compare_mode = mode; compare_mode != VOIDmode;
5321            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5322         {
5323           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5324           if (icode != CODE_FOR_nothing)
5325             break;
5326         }
5327
5328       if (icode != CODE_FOR_nothing)
5329         {
5330           enum machine_mode result_mode
5331             = insn_data[(int) icode].operand[0].mode;
5332           rtx cstore_op0 = op0;
5333           rtx cstore_op1 = op1;
5334
5335           do_pending_stack_adjust ();
5336           last = get_last_insn ();
5337
5338           if (compare_mode != mode)
5339             {
5340               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5341                                           unsignedp);
5342               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5343                                           unsignedp);
5344             }
5345
5346           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5347                                                             compare_mode))
5348             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5349
5350           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5351                                                             compare_mode))
5352             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5353
5354           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5355                                        cstore_op1);
5356           subtarget = target;
5357
5358           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5359                             (subtarget, result_mode)))
5360             subtarget = gen_reg_rtx (result_mode);
5361
5362           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5363                                      cstore_op1);
5364
5365           if (pattern)
5366             {
5367               emit_insn (pattern);
5368               return emit_store_flag_1 (target, subtarget, result_mode,
5369                                         normalizep);
5370             }
5371         }
5372     }
5373
5374   delete_insns_since (last);
5375
5376   /* If optimizing, use different pseudo registers for each insn, instead
5377      of reusing the same pseudo.  This leads to better CSE, but slows
5378      down the compiler, since there are more pseudos */
5379   subtarget = (!optimize
5380                && (target_mode == mode)) ? target : NULL_RTX;
5381
5382   /* If we reached here, we can't do this with a scc insn.  However, there
5383      are some comparisons that can be done directly.  For example, if
5384      this is an equality comparison of integers, we can try to exclusive-or
5385      (or subtract) the two operands and use a recursive call to try the
5386      comparison with zero.  Don't do any of these cases if branches are
5387      very cheap.  */
5388
5389   if (BRANCH_COST (optimize_insn_for_speed_p (),
5390                    false) > 0
5391       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5392       && op1 != const0_rtx)
5393     {
5394       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5395                           OPTAB_WIDEN);
5396
5397       if (tem == 0)
5398         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5399                             OPTAB_WIDEN);
5400       if (tem != 0)
5401         tem = emit_store_flag (target, code, tem, const0_rtx,
5402                                mode, unsignedp, normalizep);
5403       if (tem == 0)
5404         delete_insns_since (last);
5405       return tem;
5406     }
5407
5408   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5409      the constant zero.  Reject all other comparisons at this point.  Only
5410      do LE and GT if branches are expensive since they are expensive on
5411      2-operand machines.  */
5412
5413   if (BRANCH_COST (optimize_insn_for_speed_p (),
5414                    false) == 0
5415       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5416       || (code != EQ && code != NE
5417           && (BRANCH_COST (optimize_insn_for_speed_p (),
5418                            false) <= 1 || (code != LE && code != GT))))
5419     return 0;
5420
5421   /* See what we need to return.  We can only return a 1, -1, or the
5422      sign bit.  */
5423
5424   if (normalizep == 0)
5425     {
5426       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5427         normalizep = STORE_FLAG_VALUE;
5428
5429       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5430                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5431                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5432         ;
5433       else
5434         return 0;
5435     }
5436
5437   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5438      do the necessary operation below.  */
5439
5440   tem = 0;
5441
5442   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5443      the sign bit set.  */
5444
5445   if (code == LE)
5446     {
5447       /* This is destructive, so SUBTARGET can't be OP0.  */
5448       if (rtx_equal_p (subtarget, op0))
5449         subtarget = 0;
5450
5451       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5452                           OPTAB_WIDEN);
5453       if (tem)
5454         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5455                             OPTAB_WIDEN);
5456     }
5457
5458   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5459      number of bits in the mode of OP0, minus one.  */
5460
5461   if (code == GT)
5462     {
5463       if (rtx_equal_p (subtarget, op0))
5464         subtarget = 0;
5465
5466       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5467                           size_int (GET_MODE_BITSIZE (mode) - 1),
5468                           subtarget, 0);
5469       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5470                           OPTAB_WIDEN);
5471     }
5472
5473   if (code == EQ || code == NE)
5474     {
5475       /* For EQ or NE, one way to do the comparison is to apply an operation
5476          that converts the operand into a positive number if it is nonzero
5477          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5478          for NE we negate.  This puts the result in the sign bit.  Then we
5479          normalize with a shift, if needed.
5480
5481          Two operations that can do the above actions are ABS and FFS, so try
5482          them.  If that doesn't work, and MODE is smaller than a full word,
5483          we can use zero-extension to the wider mode (an unsigned conversion)
5484          as the operation.  */
5485
5486       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5487          that is compensated by the subsequent overflow when subtracting
5488          one / negating.  */
5489
5490       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5491         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5492       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5493         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5494       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5495         {
5496           tem = convert_modes (word_mode, mode, op0, 1);
5497           mode = word_mode;
5498         }
5499
5500       if (tem != 0)
5501         {
5502           if (code == EQ)
5503             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5504                                 0, OPTAB_WIDEN);
5505           else
5506             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5507         }
5508
5509       /* If we couldn't do it that way, for NE we can "or" the two's complement
5510          of the value with itself.  For EQ, we take the one's complement of
5511          that "or", which is an extra insn, so we only handle EQ if branches
5512          are expensive.  */
5513
5514       if (tem == 0
5515           && (code == NE
5516               || BRANCH_COST (optimize_insn_for_speed_p (),
5517                               false) > 1))
5518         {
5519           if (rtx_equal_p (subtarget, op0))
5520             subtarget = 0;
5521
5522           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5523           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5524                               OPTAB_WIDEN);
5525
5526           if (tem && code == EQ)
5527             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5528         }
5529     }
5530
5531   if (tem && normalizep)
5532     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5533                         size_int (GET_MODE_BITSIZE (mode) - 1),
5534                         subtarget, normalizep == 1);
5535
5536   if (tem)
5537     {
5538       if (GET_MODE (tem) != target_mode)
5539         {
5540           convert_move (target, tem, 0);
5541           tem = target;
5542         }
5543       else if (!subtarget)
5544         {
5545           emit_move_insn (target, tem);
5546           tem = target;
5547         }
5548     }
5549   else
5550     delete_insns_since (last);
5551
5552   return tem;
5553 }
5554
5555 /* Like emit_store_flag, but always succeeds.  */
5556
5557 rtx
5558 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5559                        enum machine_mode mode, int unsignedp, int normalizep)
5560 {
5561   rtx tem, label;
5562
5563   /* First see if emit_store_flag can do the job.  */
5564   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5565   if (tem != 0)
5566     return tem;
5567
5568   if (normalizep == 0)
5569     normalizep = 1;
5570
5571   /* If this failed, we have to do this with set/compare/jump/set code.  */
5572
5573   if (!REG_P (target)
5574       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5575     target = gen_reg_rtx (GET_MODE (target));
5576
5577   emit_move_insn (target, const1_rtx);
5578   label = gen_label_rtx ();
5579   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5580                            NULL_RTX, label);
5581
5582   emit_move_insn (target, const0_rtx);
5583   emit_label (label);
5584
5585   return target;
5586 }
5587 \f
5588 /* Perform possibly multi-word comparison and conditional jump to LABEL
5589    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5590    now a thin wrapper around do_compare_rtx_and_jump.  */
5591
5592 static void
5593 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5594                  rtx label)
5595 {
5596   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5597   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5598                            NULL_RTX, NULL_RTX, label);
5599 }