gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;         rtunion reg_fld[2];
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       gcc_unreachable ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341   rtx orig_value;
 342
 343   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 344
 345   while (GET_CODE (op0) == SUBREG)
 346     {
 347       /* The following line once was done only if WORDS_BIG_ENDIAN,
 348          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 349          meaningful at a much higher level; when structures are copied
 350          between memory and regs, the higher-numbered regs
 351          always get higher addresses.  */
 352       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 353       /* We used to adjust BITPOS here, but now we do the whole adjustment
 354          right after the loop.  */
 355       op0 = SUBREG_REG (op0);
 356     }
 357
 358   /* Use vec_set patterns for inserting parts of vectors whenever
 359      available.  */
 360   if (VECTOR_MODE_P (GET_MODE (op0))
 361       && !MEM_P (op0)
 362       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 363           != CODE_FOR_nothing)
 364       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 365       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 366       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 367     {
 368       enum machine_mode outermode = GET_MODE (op0);
 369       enum machine_mode innermode = GET_MODE_INNER (outermode);
 370       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 371       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 372       rtx rtxpos = GEN_INT (pos);
 373       rtx src = value;
 374       rtx dest = op0;
 375       rtx pat, seq;
 376       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 377       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 378       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 379
 380       start_sequence ();
 381
 382       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 383         src = copy_to_mode_reg (mode1, src);
 384
 385       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 386         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 387
 388       /* We could handle this, but we should always be called with a pseudo
 389          for our targets and all insns should take them as outputs.  */
 390       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 391                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 392                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               /* Else we've got some float mode source being extracted
 437                  into a different float mode destination -- this
 438                  combination of subregs results in Severe Tire
 439                  Damage.  */
 440               gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 441                           || GET_MODE_CLASS (fieldmode) == MODE_INT
 442                           || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 443               op0 = SUBREG_REG (op0);
 444             }
 445           if (REG_P (op0))
 446             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 447           else
 448             op0 = adjust_address (op0, fieldmode, offset);
 449         }
 450       emit_move_insn (op0, value);
 451       return value;
 452     }
 453
 454   /* Make sure we are playing with integral modes.  Pun with subregs
 455      if we aren't.  This must come after the entire register case above,
 456      since that case is valid for any mode.  The following cases are only
 457      valid for integral modes.  */
 458   {
 459     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 460     if (imode != GET_MODE (op0))
 461       {
 462         if (MEM_P (op0))
 463           op0 = adjust_address (op0, imode, 0);
 464         else
 465           {
 466             gcc_assert (imode != BLKmode);
 467             op0 = gen_lowpart (imode, op0);
 468           }
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           /* Else we've got some float mode source being extracted into
 514              a different float mode destination -- this combination of
 515              subregs results in Severe Tire Damage.  */
 516           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 517                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 518                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 519           op0 = SUBREG_REG (op0);
 520         }
 521
 522       emit_insn (GEN_FCN (icode)
 523                  (gen_rtx_SUBREG (fieldmode, op0,
 524                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 525                                   + (offset * UNITS_PER_WORD)),
 526                                   value));
 527
 528       return value;
 529     }
 530
 531   /* Handle fields bigger than a word.  */
 532
 533   if (bitsize > BITS_PER_WORD)
 534     {
 535       /* Here we transfer the words of the field
 536          in the order least significant first.
 537          This is because the most significant word is the one which may
 538          be less than full.
 539          However, only do that if the value is not BLKmode.  */
 540
 541       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 542       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 543       unsigned int i;
 544
 545       /* This is the mode we must force value to, so that there will be enough
 546          subwords to extract.  Note that fieldmode will often (always?) be
 547          VOIDmode, because that is what store_field uses to indicate that this
 548          is a bit field, but passing VOIDmode to operand_subword_force will
 549          result in an abort.  */
 550       fieldmode = GET_MODE (value);
 551       if (fieldmode == VOIDmode)
 552         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 553
 554       for (i = 0; i < nwords; i++)
 555         {
 556           /* If I is 0, use the low-order word in both field and target;
 557              if I is 1, use the next to lowest word; and so on.  */
 558           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 559           unsigned int bit_offset = (backwards
 560                                      ? MAX ((int) bitsize - ((int) i + 1)
 561                                             * BITS_PER_WORD,
 562                                             0)
 563                                      : (int) i * BITS_PER_WORD);
 564
 565           store_bit_field (op0, MIN (BITS_PER_WORD,
 566                                      bitsize - i * BITS_PER_WORD),
 567                            bitnum + bit_offset, word_mode,
 568                            operand_subword_force (value, wordnum, fieldmode));
 569         }
 570       return value;
 571     }
 572
 573   /* From here on we can assume that the field to be stored in is
 574      a full-word (whatever type that is), since it is shorter than a word.  */
 575
 576   /* OFFSET is the number of words or bytes (UNIT says which)
 577      from STR_RTX to the first word or byte containing part of the field.  */
 578
 579   if (!MEM_P (op0))
 580     {
 581       if (offset != 0
 582           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 583         {
 584           if (!REG_P (op0))
 585             {
 586               /* Since this is a destination (lvalue), we can't copy it to a
 587                  pseudo.  We can trivially remove a SUBREG that does not
 588                  change the size of the operand.  Such a SUBREG may have been
 589                  added above.  Otherwise, abort.  */
 590               gcc_assert (GET_CODE (op0) == SUBREG
 591                           && (GET_MODE_SIZE (GET_MODE (op0))
 592                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 593               op0 = SUBREG_REG (op0);
 594             }
 595           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 596                                 op0, (offset * UNITS_PER_WORD));
 597         }
 598       offset = 0;
 599     }
 600
 601   /* If VALUE is a floating-point mode, access it as an integer of the
 602      corresponding size.  This can occur on a machine with 64 bit registers
 603      that uses SFmode for float.  This can also occur for unaligned float
 604      structure fields.  */
 605   orig_value = value;
 606   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 607       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 608     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 609                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 610                          value);
 611
 612   /* Now OFFSET is nonzero only if OP0 is memory
 613      and is therefore always measured in bytes.  */
 614
 615   if (HAVE_insv
 616       && GET_MODE (value) != BLKmode
 617       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 618       /* Ensure insv's size is wide enough for this field.  */
 619       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 620       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 621             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 622     {
 623       int xbitpos = bitpos;
 624       rtx value1;
 625       rtx xop0 = op0;
 626       rtx last = get_last_insn ();
 627       rtx pat;
 628       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 629       int save_volatile_ok = volatile_ok;
 630
 631       volatile_ok = 1;
 632
 633       /* If this machine's insv can only insert into a register, copy OP0
 634          into a register and save it back later.  */
 635       /* This used to check flag_force_mem, but that was a serious
 636          de-optimization now that flag_force_mem is enabled by -O2.  */
 637       if (MEM_P (op0)
 638           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 639                 (op0, VOIDmode)))
 640         {
 641           rtx tempreg;
 642           enum machine_mode bestmode;
 643
 644           /* Get the mode to use for inserting into this field.  If OP0 is
 645              BLKmode, get the smallest mode consistent with the alignment. If
 646              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 647              mode. Otherwise, use the smallest mode containing the field.  */
 648
 649           if (GET_MODE (op0) == BLKmode
 650               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 651             bestmode
 652               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 653                                MEM_VOLATILE_P (op0));
 654           else
 655             bestmode = GET_MODE (op0);
 656
 657           if (bestmode == VOIDmode
 658               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 659                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 660             goto insv_loses;
 661
 662           /* Adjust address to point to the containing unit of that mode.
 663              Compute offset as multiple of this unit, counting in bytes.  */
 664           unit = GET_MODE_BITSIZE (bestmode);
 665           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 666           bitpos = bitnum % unit;
 667           op0 = adjust_address (op0, bestmode,  offset);
 668
 669           /* Fetch that unit, store the bitfield in it, then store
 670              the unit.  */
 671           tempreg = copy_to_reg (op0);
 672           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 673           emit_move_insn (op0, tempreg);
 674           return value;
 675         }
 676       volatile_ok = save_volatile_ok;
 677
 678       /* Add OFFSET into OP0's address.  */
 679       if (MEM_P (xop0))
 680         xop0 = adjust_address (xop0, byte_mode, offset);
 681
 682       /* If xop0 is a register, we need it in MAXMODE
 683          to make it acceptable to the format of insv.  */
 684       if (GET_CODE (xop0) == SUBREG)
 685         /* We can't just change the mode, because this might clobber op0,
 686            and we will need the original value of op0 if insv fails.  */
 687         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 688       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 689         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 690
 691       /* On big-endian machines, we count bits from the most significant.
 692          If the bit field insn does not, we must invert.  */
 693
 694       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 695         xbitpos = unit - bitsize - xbitpos;
 696
 697       /* We have been counting XBITPOS within UNIT.
 698          Count instead within the size of the register.  */
 699       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 700         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 701
 702       unit = GET_MODE_BITSIZE (maxmode);
 703
 704       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 705       value1 = value;
 706       if (GET_MODE (value) != maxmode)
 707         {
 708           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 709             {
 710               /* Optimization: Don't bother really extending VALUE
 711                  if it has all the bits we will actually use.  However,
 712                  if we must narrow it, be sure we do it correctly.  */
 713
 714               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 715                 {
 716                   rtx tmp;
 717
 718                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 719                   if (! tmp)
 720                     tmp = simplify_gen_subreg (maxmode,
 721                                                force_reg (GET_MODE (value),
 722                                                           value1),
 723                                                GET_MODE (value), 0);
 724                   value1 = tmp;
 725                 }
 726               else
 727                 value1 = gen_lowpart (maxmode, value1);
 728             }
 729           else if (GET_CODE (value) == CONST_INT)
 730             value1 = gen_int_mode (INTVAL (value), maxmode);
 731           else
 732             /* Parse phase is supposed to make VALUE's data type
 733                match that of the component reference, which is a type
 734                at least as wide as the field; so VALUE should have
 735                a mode that corresponds to that type.  */
 736             gcc_assert (CONSTANT_P (value));
 737         }
 738
 739       /* If this machine's insv insists on a register,
 740          get VALUE1 into a register.  */
 741       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 742              (value1, maxmode)))
 743         value1 = force_reg (maxmode, value1);
 744
 745       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 746       if (pat)
 747         emit_insn (pat);
 748       else
 749         {
 750           delete_insns_since (last);
 751           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 752         }
 753     }
 754   else
 755     insv_loses:
 756     /* Insv is not available; store using shifts and boolean ops.  */
 757     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 758   return value;
 759 }
 760 \f
 761 /* Use shifts and boolean operations to store VALUE
 762    into a bit field of width BITSIZE
 763    in a memory location specified by OP0 except offset by OFFSET bytes.
 764      (OFFSET must be 0 if OP0 is a register.)
 765    The field starts at position BITPOS within the byte.
 766     (If OP0 is a register, it may be a full word or a narrower mode,
 767      but BITPOS still counts within a full word,
 768      which is significant on bigendian machines.)  */
 769
 770 static void
 771 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 772                        unsigned HOST_WIDE_INT bitsize,
 773                        unsigned HOST_WIDE_INT bitpos, rtx value)
 774 {
 775   enum machine_mode mode;
 776   unsigned int total_bits = BITS_PER_WORD;
 777   rtx subtarget, temp;
 778   int all_zero = 0;
 779   int all_one = 0;
 780
 781   /* There is a case not handled here:
 782      a structure with a known alignment of just a halfword
 783      and a field split across two aligned halfwords within the structure.
 784      Or likewise a structure with a known alignment of just a byte
 785      and a field split across two bytes.
 786      Such cases are not supposed to be able to occur.  */
 787
 788   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 789     {
 790       gcc_assert (!offset);
 791       /* Special treatment for a bit field split across two registers.  */
 792       if (bitsize + bitpos > BITS_PER_WORD)
 793         {
 794           store_split_bit_field (op0, bitsize, bitpos, value);
 795           return;
 796         }
 797     }
 798   else
 799     {
 800       /* Get the proper mode to use for this field.  We want a mode that
 801          includes the entire field.  If such a mode would be larger than
 802          a word, we won't be doing the extraction the normal way.
 803          We don't want a mode bigger than the destination.  */
 804
 805       mode = GET_MODE (op0);
 806       if (GET_MODE_BITSIZE (mode) == 0
 807           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 808         mode = word_mode;
 809       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 810                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 811
 812       if (mode == VOIDmode)
 813         {
 814           /* The only way this should occur is if the field spans word
 815              boundaries.  */
 816           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 817                                  value);
 818           return;
 819         }
 820
 821       total_bits = GET_MODE_BITSIZE (mode);
 822
 823       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 824          be in the range 0 to total_bits-1, and put any excess bytes in
 825          OFFSET.  */
 826       if (bitpos >= total_bits)
 827         {
 828           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 829           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 830                      * BITS_PER_UNIT);
 831         }
 832
 833       /* Get ref to an aligned byte, halfword, or word containing the field.
 834          Adjust BITPOS to be position within a word,
 835          and OFFSET to be the offset of that word.
 836          Then alter OP0 to refer to that word.  */
 837       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 838       offset -= (offset % (total_bits / BITS_PER_UNIT));
 839       op0 = adjust_address (op0, mode, offset);
 840     }
 841
 842   mode = GET_MODE (op0);
 843
 844   /* Now MODE is either some integral mode for a MEM as OP0,
 845      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 846      The bit field is contained entirely within OP0.
 847      BITPOS is the starting bit number within OP0.
 848      (OP0's mode may actually be narrower than MODE.)  */
 849
 850   if (BYTES_BIG_ENDIAN)
 851       /* BITPOS is the distance between our msb
 852          and that of the containing datum.
 853          Convert it to the distance from the lsb.  */
 854       bitpos = total_bits - bitsize - bitpos;
 855
 856   /* Now BITPOS is always the distance between our lsb
 857      and that of OP0.  */
 858
 859   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 860      we must first convert its mode to MODE.  */
 861
 862   if (GET_CODE (value) == CONST_INT)
 863     {
 864       HOST_WIDE_INT v = INTVAL (value);
 865
 866       if (bitsize < HOST_BITS_PER_WIDE_INT)
 867         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 868
 869       if (v == 0)
 870         all_zero = 1;
 871       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 872                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 873                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 874         all_one = 1;
 875
 876       value = lshift_value (mode, value, bitpos, bitsize);
 877     }
 878   else
 879     {
 880       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 881                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 882
 883       if (GET_MODE (value) != mode)
 884         {
 885           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 886               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 887             value = gen_lowpart (mode, value);
 888           else
 889             value = convert_to_mode (mode, value, 1);
 890         }
 891
 892       if (must_and)
 893         value = expand_binop (mode, and_optab, value,
 894                               mask_rtx (mode, 0, bitsize, 0),
 895                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 896       if (bitpos > 0)
 897         value = expand_shift (LSHIFT_EXPR, mode, value,
 898                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 899     }
 900
 901   /* Now clear the chosen bits in OP0,
 902      except that if VALUE is -1 we need not bother.  */
 903
 904   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 905
 906   if (! all_one)
 907     {
 908       temp = expand_binop (mode, and_optab, op0,
 909                            mask_rtx (mode, bitpos, bitsize, 1),
 910                            subtarget, 1, OPTAB_LIB_WIDEN);
 911       subtarget = temp;
 912     }
 913   else
 914     temp = op0;
 915
 916   /* Now logical-or VALUE into OP0, unless it is zero.  */
 917
 918   if (! all_zero)
 919     temp = expand_binop (mode, ior_optab, temp, value,
 920                          subtarget, 1, OPTAB_LIB_WIDEN);
 921   if (op0 != temp)
 922     emit_move_insn (op0, temp);
 923 }
 924 \f
 925 /* Store a bit field that is split across multiple accessible memory objects.
 926
 927    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 928    BITSIZE is the field width; BITPOS the position of its first bit
 929    (within the word).
 930    VALUE is the value to store.
 931
 932    This does not yet handle fields wider than BITS_PER_WORD.  */
 933
 934 static void
 935 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 936                        unsigned HOST_WIDE_INT bitpos, rtx value)
 937 {
 938   unsigned int unit;
 939   unsigned int bitsdone = 0;
 940
 941   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 942      much at a time.  */
 943   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 944     unit = BITS_PER_WORD;
 945   else
 946     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 947
 948   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 949      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 950      that VALUE might be a floating-point constant.  */
 951   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 952     {
 953       rtx word = gen_lowpart_common (word_mode, value);
 954
 955       if (word && (value != word))
 956         value = word;
 957       else
 958         value = gen_lowpart_common (word_mode,
 959                                     force_reg (GET_MODE (value) != VOIDmode
 960                                                ? GET_MODE (value)
 961                                                : word_mode, value));
 962     }
 963
 964   while (bitsdone < bitsize)
 965     {
 966       unsigned HOST_WIDE_INT thissize;
 967       rtx part, word;
 968       unsigned HOST_WIDE_INT thispos;
 969       unsigned HOST_WIDE_INT offset;
 970
 971       offset = (bitpos + bitsdone) / unit;
 972       thispos = (bitpos + bitsdone) % unit;
 973
 974       /* THISSIZE must not overrun a word boundary.  Otherwise,
 975          store_fixed_bit_field will call us again, and we will mutually
 976          recurse forever.  */
 977       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 978       thissize = MIN (thissize, unit - thispos);
 979
 980       if (BYTES_BIG_ENDIAN)
 981         {
 982           int total_bits;
 983
 984           /* We must do an endian conversion exactly the same way as it is
 985              done in extract_bit_field, so that the two calls to
 986              extract_fixed_bit_field will have comparable arguments.  */
 987           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 988             total_bits = BITS_PER_WORD;
 989           else
 990             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 991
 992           /* Fetch successively less significant portions.  */
 993           if (GET_CODE (value) == CONST_INT)
 994             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 995                              >> (bitsize - bitsdone - thissize))
 996                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 997           else
 998             /* The args are chosen so that the last part includes the
 999                lsb.  Give extract_bit_field the value it needs (with
1000                endianness compensation) to fetch the piece we want.  */
1001             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002                                             total_bits - bitsize + bitsdone,
1003                                             NULL_RTX, 1);
1004         }
1005       else
1006         {
1007           /* Fetch successively more significant portions.  */
1008           if (GET_CODE (value) == CONST_INT)
1009             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1010                              >> bitsdone)
1011                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1012           else
1013             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1014                                             bitsdone, NULL_RTX, 1);
1015         }
1016
1017       /* If OP0 is a register, then handle OFFSET here.
1018
1019          When handling multiword bitfields, extract_bit_field may pass
1020          down a word_mode SUBREG of a larger REG for a bitfield that actually
1021          crosses a word boundary.  Thus, for a SUBREG, we must find
1022          the current word starting from the base register.  */
1023       if (GET_CODE (op0) == SUBREG)
1024         {
1025           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1026           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1027                                         GET_MODE (SUBREG_REG (op0)));
1028           offset = 0;
1029         }
1030       else if (REG_P (op0))
1031         {
1032           word = operand_subword_force (op0, offset, GET_MODE (op0));
1033           offset = 0;
1034         }
1035       else
1036         word = op0;
1037
1038       /* OFFSET is in UNITs, and UNIT is in bits.
1039          store_fixed_bit_field wants offset in bytes.  */
1040       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1041                              thispos, part);
1042       bitsdone += thissize;
1043     }
1044 }
1045 \f
1046 /* Generate code to extract a byte-field from STR_RTX
1047    containing BITSIZE bits, starting at BITNUM,
1048    and put it in TARGET if possible (if TARGET is nonzero).
1049    Regardless of TARGET, we return the rtx for where the value is placed.
1050
1051    STR_RTX is the structure containing the byte (a REG or MEM).
1052    UNSIGNEDP is nonzero if this is an unsigned bit field.
1053    MODE is the natural mode of the field value once extracted.
1054    TMODE is the mode the caller would like the value to have;
1055    but the value may be returned with type MODE instead.
1056
1057    TOTAL_SIZE is the size in bytes of the containing structure,
1058    or -1 if varying.
1059
1060    If a TARGET is specified and we can store in it at no extra cost,
1061    we do so, and return TARGET.
1062    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1063    if they are equally easy.  */
1064
1065 rtx
1066 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1067                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1068                    enum machine_mode mode, enum machine_mode tmode)
1069 {
1070   unsigned int unit
1071     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1072   unsigned HOST_WIDE_INT offset = bitnum / unit;
1073   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1074   rtx op0 = str_rtx;
1075   rtx spec_target = target;
1076   rtx spec_target_subreg = 0;
1077   enum machine_mode int_mode;
1078   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1079   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1080   enum machine_mode mode1;
1081   int byte_offset;
1082
1083   if (tmode == VOIDmode)
1084     tmode = mode;
1085
1086   while (GET_CODE (op0) == SUBREG)
1087     {
1088       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1089       if (bitpos > unit)
1090         {
1091           offset += (bitpos / unit);
1092           bitpos %= unit;
1093         }
1094       op0 = SUBREG_REG (op0);
1095     }
1096
1097   if (REG_P (op0)
1098       && mode == GET_MODE (op0)
1099       && bitnum == 0
1100       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1101     {
1102       /* We're trying to extract a full register from itself.  */
1103       return op0;
1104     }
1105
1106   /* Use vec_extract patterns for extracting parts of vectors whenever
1107      available.  */
1108   if (VECTOR_MODE_P (GET_MODE (op0))
1109       && !MEM_P (op0)
1110       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1111           != CODE_FOR_nothing)
1112       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1113           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1114     {
1115       enum machine_mode outermode = GET_MODE (op0);
1116       enum machine_mode innermode = GET_MODE_INNER (outermode);
1117       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1118       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1119       rtx rtxpos = GEN_INT (pos);
1120       rtx src = op0;
1121       rtx dest = NULL, pat, seq;
1122       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1123       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1124       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1125
1126       if (innermode == tmode || innermode == mode)
1127         dest = target;
1128
1129       if (!dest)
1130         dest = gen_reg_rtx (innermode);
1131
1132       start_sequence ();
1133
1134       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1135         dest = copy_to_mode_reg (mode0, dest);
1136
1137       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1138         src = copy_to_mode_reg (mode1, src);
1139
1140       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1142
1143       /* We could handle this, but we should always be called with a pseudo
1144          for our targets and all insns should take them as outputs.  */
1145       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1146                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1147                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1148
1149       pat = GEN_FCN (icode) (dest, src, rtxpos);
1150       seq = get_insns ();
1151       end_sequence ();
1152       if (pat)
1153         {
1154           emit_insn (seq);
1155           emit_insn (pat);
1156           return dest;
1157         }
1158     }
1159
1160   /* Make sure we are playing with integral modes.  Pun with subregs
1161      if we aren't.  */
1162   {
1163     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1164     if (imode != GET_MODE (op0))
1165       {
1166         if (MEM_P (op0))
1167           op0 = adjust_address (op0, imode, 0);
1168         else
1169           {
1170             gcc_assert (imode != BLKmode);
1171             op0 = gen_lowpart (imode, op0);
1172           }
1173       }
1174   }
1175
1176   /* We may be accessing data outside the field, which means
1177      we can alias adjacent data.  */
1178   if (MEM_P (op0))
1179     {
1180       op0 = shallow_copy_rtx (op0);
1181       set_mem_alias_set (op0, 0);
1182       set_mem_expr (op0, 0);
1183     }
1184
1185   /* Extraction of a full-word or multi-word value from a structure
1186      in a register or aligned memory can be done with just a SUBREG.
1187      A subword value in the least significant part of a register
1188      can also be extracted with a SUBREG.  For this, we need the
1189      byte offset of the value in op0.  */
1190
1191   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1192
1193   /* If OP0 is a register, BITPOS must count within a word.
1194      But as we have it, it counts within whatever size OP0 now has.
1195      On a bigendian machine, these are not the same, so convert.  */
1196   if (BYTES_BIG_ENDIAN
1197       && !MEM_P (op0)
1198       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1199     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1200
1201   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1202      If that's wrong, the solution is to test for it and set TARGET to 0
1203      if needed.  */
1204
1205   /* Only scalar integer modes can be converted via subregs.  There is an
1206      additional problem for FP modes here in that they can have a precision
1207      which is different from the size.  mode_for_size uses precision, but
1208      we want a mode based on the size, so we must avoid calling it for FP
1209      modes.  */
1210   mode1  = (SCALAR_INT_MODE_P (tmode)
1211             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1212             : mode);
1213
1214   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1215         && bitpos % BITS_PER_WORD == 0)
1216        || (mode1 != BLKmode
1217            /* ??? The big endian test here is wrong.  This is correct
1218               if the value is in a register, and if mode_for_size is not
1219               the same mode as op0.  This causes us to get unnecessarily
1220               inefficient code from the Thumb port when -mbig-endian.  */
1221            && (BYTES_BIG_ENDIAN
1222                ? bitpos + bitsize == BITS_PER_WORD
1223                : bitpos == 0)))
1224       && ((!MEM_P (op0)
1225            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1226                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1227            && GET_MODE_SIZE (mode1) != 0
1228            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1229           || (MEM_P (op0)
1230               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1231                   || (offset * BITS_PER_UNIT % bitsize == 0
1232                       && MEM_ALIGN (op0) % bitsize == 0)))))
1233     {
1234       if (mode1 != GET_MODE (op0))
1235         {
1236           if (GET_CODE (op0) == SUBREG)
1237             {
1238               if (GET_MODE (SUBREG_REG (op0)) == mode1
1239                   || GET_MODE_CLASS (mode1) == MODE_INT
1240                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1241                 op0 = SUBREG_REG (op0);
1242               else
1243                 /* Else we've got some float mode source being extracted into
1244                    a different float mode destination -- this combination of
1245                    subregs results in Severe Tire Damage.  */
1246                 goto no_subreg_mode_swap;
1247             }
1248           if (REG_P (op0))
1249             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1250           else
1251             op0 = adjust_address (op0, mode1, offset);
1252         }
1253       if (mode1 != mode)
1254         return convert_to_mode (tmode, op0, unsignedp);
1255       return op0;
1256     }
1257  no_subreg_mode_swap:
1258
1259   /* Handle fields bigger than a word.  */
1260
1261   if (bitsize > BITS_PER_WORD)
1262     {
1263       /* Here we transfer the words of the field
1264          in the order least significant first.
1265          This is because the most significant word is the one which may
1266          be less than full.  */
1267
1268       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1269       unsigned int i;
1270
1271       if (target == 0 || !REG_P (target))
1272         target = gen_reg_rtx (mode);
1273
1274       /* Indicate for flow that the entire target reg is being set.  */
1275       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1276
1277       for (i = 0; i < nwords; i++)
1278         {
1279           /* If I is 0, use the low-order word in both field and target;
1280              if I is 1, use the next to lowest word; and so on.  */
1281           /* Word number in TARGET to use.  */
1282           unsigned int wordnum
1283             = (WORDS_BIG_ENDIAN
1284                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1285                : i);
1286           /* Offset from start of field in OP0.  */
1287           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1288                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1289                                                 * (int) BITS_PER_WORD))
1290                                      : (int) i * BITS_PER_WORD);
1291           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1292           rtx result_part
1293             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1294                                            bitsize - i * BITS_PER_WORD),
1295                                  bitnum + bit_offset, 1, target_part, mode,
1296                                  word_mode);
1297
1298           gcc_assert (target_part);
1299
1300           if (result_part != target_part)
1301             emit_move_insn (target_part, result_part);
1302         }
1303
1304       if (unsignedp)
1305         {
1306           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1307              need to be zero'd out.  */
1308           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1309             {
1310               unsigned int i, total_words;
1311
1312               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1313               for (i = nwords; i < total_words; i++)
1314                 emit_move_insn
1315                   (operand_subword (target,
1316                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1317                                     1, VOIDmode),
1318                    const0_rtx);
1319             }
1320           return target;
1321         }
1322
1323       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1324       target = expand_shift (LSHIFT_EXPR, mode, target,
1325                              build_int_cst (NULL_TREE,
1326                                             GET_MODE_BITSIZE (mode) - bitsize),
1327                              NULL_RTX, 0);
1328       return expand_shift (RSHIFT_EXPR, mode, target,
1329                            build_int_cst (NULL_TREE,
1330                                           GET_MODE_BITSIZE (mode) - bitsize),
1331                            NULL_RTX, 0);
1332     }
1333
1334   /* From here on we know the desired field is smaller than a word.  */
1335
1336   /* Check if there is a correspondingly-sized integer field, so we can
1337      safely extract it as one size of integer, if necessary; then
1338      truncate or extend to the size that is wanted; then use SUBREGs or
1339      convert_to_mode to get one of the modes we really wanted.  */
1340
1341   int_mode = int_mode_for_mode (tmode);
1342   if (int_mode == BLKmode)
1343     int_mode = int_mode_for_mode (mode);
1344   /* Should probably push op0 out to memory and then do a load.  */
1345   gcc_assert (int_mode != BLKmode);
1346
1347   /* OFFSET is the number of words or bytes (UNIT says which)
1348      from STR_RTX to the first word or byte containing part of the field.  */
1349   if (!MEM_P (op0))
1350     {
1351       if (offset != 0
1352           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1353         {
1354           if (!REG_P (op0))
1355             op0 = copy_to_reg (op0);
1356           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1357                                 op0, (offset * UNITS_PER_WORD));
1358         }
1359       offset = 0;
1360     }
1361
1362   /* Now OFFSET is nonzero only for memory operands.  */
1363
1364   if (unsignedp)
1365     {
1366       if (HAVE_extzv
1367           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1368           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1369                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1370         {
1371           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1372           rtx bitsize_rtx, bitpos_rtx;
1373           rtx last = get_last_insn ();
1374           rtx xop0 = op0;
1375           rtx xtarget = target;
1376           rtx xspec_target = spec_target;
1377           rtx xspec_target_subreg = spec_target_subreg;
1378           rtx pat;
1379           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1380
1381           if (MEM_P (xop0))
1382             {
1383               int save_volatile_ok = volatile_ok;
1384               volatile_ok = 1;
1385
1386               /* Is the memory operand acceptable?  */
1387               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1388                      (xop0, GET_MODE (xop0))))
1389                 {
1390                   /* No, load into a reg and extract from there.  */
1391                   enum machine_mode bestmode;
1392
1393                   /* Get the mode to use for inserting into this field.  If
1394                      OP0 is BLKmode, get the smallest mode consistent with the
1395                      alignment. If OP0 is a non-BLKmode object that is no
1396                      wider than MAXMODE, use its mode. Otherwise, use the
1397                      smallest mode containing the field.  */
1398
1399                   if (GET_MODE (xop0) == BLKmode
1400                       || (GET_MODE_SIZE (GET_MODE (op0))
1401                           > GET_MODE_SIZE (maxmode)))
1402                     bestmode = get_best_mode (bitsize, bitnum,
1403                                               MEM_ALIGN (xop0), maxmode,
1404                                               MEM_VOLATILE_P (xop0));
1405                   else
1406                     bestmode = GET_MODE (xop0);
1407
1408                   if (bestmode == VOIDmode
1409                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1410                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1411                     goto extzv_loses;
1412
1413                   /* Compute offset as multiple of this unit,
1414                      counting in bytes.  */
1415                   unit = GET_MODE_BITSIZE (bestmode);
1416                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1417                   xbitpos = bitnum % unit;
1418                   xop0 = adjust_address (xop0, bestmode, xoffset);
1419
1420                   /* Fetch it to a register in that size.  */
1421                   xop0 = force_reg (bestmode, xop0);
1422
1423                   /* XBITPOS counts within UNIT, which is what is expected.  */
1424                 }
1425               else
1426                 /* Get ref to first byte containing part of the field.  */
1427                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1428
1429               volatile_ok = save_volatile_ok;
1430             }
1431
1432           /* If op0 is a register, we need it in MAXMODE (which is usually
1433              SImode). to make it acceptable to the format of extzv.  */
1434           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1435             goto extzv_loses;
1436           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1437             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1438
1439           /* On big-endian machines, we count bits from the most significant.
1440              If the bit field insn does not, we must invert.  */
1441           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1442             xbitpos = unit - bitsize - xbitpos;
1443
1444           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1445           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1446             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1447
1448           unit = GET_MODE_BITSIZE (maxmode);
1449
1450           if (xtarget == 0
1451               || (flag_force_mem && MEM_P (xtarget)))
1452             xtarget = xspec_target = gen_reg_rtx (tmode);
1453
1454           if (GET_MODE (xtarget) != maxmode)
1455             {
1456               if (REG_P (xtarget))
1457                 {
1458                   int wider = (GET_MODE_SIZE (maxmode)
1459                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1460                   xtarget = gen_lowpart (maxmode, xtarget);
1461                   if (wider)
1462                     xspec_target_subreg = xtarget;
1463                 }
1464               else
1465                 xtarget = gen_reg_rtx (maxmode);
1466             }
1467
1468           /* If this machine's extzv insists on a register target,
1469              make sure we have one.  */
1470           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1471                  (xtarget, maxmode)))
1472             xtarget = gen_reg_rtx (maxmode);
1473
1474           bitsize_rtx = GEN_INT (bitsize);
1475           bitpos_rtx = GEN_INT (xbitpos);
1476
1477           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1478           if (pat)
1479             {
1480               emit_insn (pat);
1481               target = xtarget;
1482               spec_target = xspec_target;
1483               spec_target_subreg = xspec_target_subreg;
1484             }
1485           else
1486             {
1487               delete_insns_since (last);
1488               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1489                                                 bitpos, target, 1);
1490             }
1491         }
1492       else
1493       extzv_loses:
1494         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1495                                           bitpos, target, 1);
1496     }
1497   else
1498     {
1499       if (HAVE_extv
1500           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1501           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1502                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1503         {
1504           int xbitpos = bitpos, xoffset = offset;
1505           rtx bitsize_rtx, bitpos_rtx;
1506           rtx last = get_last_insn ();
1507           rtx xop0 = op0, xtarget = target;
1508           rtx xspec_target = spec_target;
1509           rtx xspec_target_subreg = spec_target_subreg;
1510           rtx pat;
1511           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1512
1513           if (MEM_P (xop0))
1514             {
1515               /* Is the memory operand acceptable?  */
1516               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1517                      (xop0, GET_MODE (xop0))))
1518                 {
1519                   /* No, load into a reg and extract from there.  */
1520                   enum machine_mode bestmode;
1521
1522                   /* Get the mode to use for inserting into this field.  If
1523                      OP0 is BLKmode, get the smallest mode consistent with the
1524                      alignment. If OP0 is a non-BLKmode object that is no
1525                      wider than MAXMODE, use its mode. Otherwise, use the
1526                      smallest mode containing the field.  */
1527
1528                   if (GET_MODE (xop0) == BLKmode
1529                       || (GET_MODE_SIZE (GET_MODE (op0))
1530                           > GET_MODE_SIZE (maxmode)))
1531                     bestmode = get_best_mode (bitsize, bitnum,
1532                                               MEM_ALIGN (xop0), maxmode,
1533                                               MEM_VOLATILE_P (xop0));
1534                   else
1535                     bestmode = GET_MODE (xop0);
1536
1537                   if (bestmode == VOIDmode
1538                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1539                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1540                     goto extv_loses;
1541
1542                   /* Compute offset as multiple of this unit,
1543                      counting in bytes.  */
1544                   unit = GET_MODE_BITSIZE (bestmode);
1545                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1546                   xbitpos = bitnum % unit;
1547                   xop0 = adjust_address (xop0, bestmode, xoffset);
1548
1549                   /* Fetch it to a register in that size.  */
1550                   xop0 = force_reg (bestmode, xop0);
1551
1552                   /* XBITPOS counts within UNIT, which is what is expected.  */
1553                 }
1554               else
1555                 /* Get ref to first byte containing part of the field.  */
1556                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1557             }
1558
1559           /* If op0 is a register, we need it in MAXMODE (which is usually
1560              SImode) to make it acceptable to the format of extv.  */
1561           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1562             goto extv_loses;
1563           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1564             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1565
1566           /* On big-endian machines, we count bits from the most significant.
1567              If the bit field insn does not, we must invert.  */
1568           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1569             xbitpos = unit - bitsize - xbitpos;
1570
1571           /* XBITPOS counts within a size of UNIT.
1572              Adjust to count within a size of MAXMODE.  */
1573           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1574             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1575
1576           unit = GET_MODE_BITSIZE (maxmode);
1577
1578           if (xtarget == 0
1579               || (flag_force_mem && MEM_P (xtarget)))
1580             xtarget = xspec_target = gen_reg_rtx (tmode);
1581
1582           if (GET_MODE (xtarget) != maxmode)
1583             {
1584               if (REG_P (xtarget))
1585                 {
1586                   int wider = (GET_MODE_SIZE (maxmode)
1587                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1588                   xtarget = gen_lowpart (maxmode, xtarget);
1589                   if (wider)
1590                     xspec_target_subreg = xtarget;
1591                 }
1592               else
1593                 xtarget = gen_reg_rtx (maxmode);
1594             }
1595
1596           /* If this machine's extv insists on a register target,
1597              make sure we have one.  */
1598           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1599                  (xtarget, maxmode)))
1600             xtarget = gen_reg_rtx (maxmode);
1601
1602           bitsize_rtx = GEN_INT (bitsize);
1603           bitpos_rtx = GEN_INT (xbitpos);
1604
1605           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1606           if (pat)
1607             {
1608               emit_insn (pat);
1609               target = xtarget;
1610               spec_target = xspec_target;
1611               spec_target_subreg = xspec_target_subreg;
1612             }
1613           else
1614             {
1615               delete_insns_since (last);
1616               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1617                                                 bitpos, target, 0);
1618             }
1619         }
1620       else
1621       extv_loses:
1622         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1623                                           bitpos, target, 0);
1624     }
1625   if (target == spec_target)
1626     return target;
1627   if (target == spec_target_subreg)
1628     return spec_target;
1629   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1630     {
1631       /* If the target mode is floating-point, first convert to the
1632          integer mode of that size and then access it as a floating-point
1633          value via a SUBREG.  */
1634       if (GET_MODE_CLASS (tmode) != MODE_INT
1635           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1636         {
1637           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1638                                                    MODE_INT, 0),
1639                                     target, unsignedp);
1640           return gen_lowpart (tmode, target);
1641         }
1642       else
1643         return convert_to_mode (tmode, target, unsignedp);
1644     }
1645   return target;
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field that is split across two words
1857    and return an RTX for the result.
1858
1859    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1862
1863 static rtx
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1866 {
1867   unsigned int unit;
1868   unsigned int bitsdone = 0;
1869   rtx result = NULL_RTX;
1870   int first = 1;
1871
1872   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1873      much at a time.  */
1874   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875     unit = BITS_PER_WORD;
1876   else
1877     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1878
1879   while (bitsdone < bitsize)
1880     {
1881       unsigned HOST_WIDE_INT thissize;
1882       rtx part, word;
1883       unsigned HOST_WIDE_INT thispos;
1884       unsigned HOST_WIDE_INT offset;
1885
1886       offset = (bitpos + bitsdone) / unit;
1887       thispos = (bitpos + bitsdone) % unit;
1888
1889       /* THISSIZE must not overrun a word boundary.  Otherwise,
1890          extract_fixed_bit_field will call us again, and we will mutually
1891          recurse forever.  */
1892       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893       thissize = MIN (thissize, unit - thispos);
1894
1895       /* If OP0 is a register, then handle OFFSET here.
1896
1897          When handling multiword bitfields, extract_bit_field may pass
1898          down a word_mode SUBREG of a larger REG for a bitfield that actually
1899          crosses a word boundary.  Thus, for a SUBREG, we must find
1900          the current word starting from the base register.  */
1901       if (GET_CODE (op0) == SUBREG)
1902         {
1903           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905                                         GET_MODE (SUBREG_REG (op0)));
1906           offset = 0;
1907         }
1908       else if (REG_P (op0))
1909         {
1910           word = operand_subword_force (op0, offset, GET_MODE (op0));
1911           offset = 0;
1912         }
1913       else
1914         word = op0;
1915
1916       /* Extract the parts in bit-counting order,
1917          whose meaning is determined by BYTES_PER_UNIT.
1918          OFFSET is in UNITs, and UNIT is in bits.
1919          extract_fixed_bit_field wants offset in bytes.  */
1920       part = extract_fixed_bit_field (word_mode, word,
1921                                       offset * unit / BITS_PER_UNIT,
1922                                       thissize, thispos, 0, 1);
1923       bitsdone += thissize;
1924
1925       /* Shift this part into place for the result.  */
1926       if (BYTES_BIG_ENDIAN)
1927         {
1928           if (bitsize != bitsdone)
1929             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1930                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1931                                  0, 1);
1932         }
1933       else
1934         {
1935           if (bitsdone != thissize)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  build_int_cst (NULL_TREE,
1938                                                 bitsdone - thissize), 0, 1);
1939         }
1940
1941       if (first)
1942         result = part;
1943       else
1944         /* Combine the parts with bitwise or.  This works
1945            because we extracted each part as an unsigned bit field.  */
1946         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1947                                OPTAB_LIB_WIDEN);
1948
1949       first = 0;
1950     }
1951
1952   /* Unsigned bit field: we are done.  */
1953   if (unsignedp)
1954     return result;
1955   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1956   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1957                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1958                          NULL_RTX, 0);
1959   return expand_shift (RSHIFT_EXPR, word_mode, result,
1960                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1961                        NULL_RTX, 0);
1962 }
1963 \f
1964 /* Add INC into TARGET.  */
1965
1966 void
1967 expand_inc (rtx target, rtx inc)
1968 {
1969   rtx value = expand_binop (GET_MODE (target), add_optab,
1970                             target, inc,
1971                             target, 0, OPTAB_LIB_WIDEN);
1972   if (value != target)
1973     emit_move_insn (target, value);
1974 }
1975
1976 /* Subtract DEC from TARGET.  */
1977
1978 void
1979 expand_dec (rtx target, rtx dec)
1980 {
1981   rtx value = expand_binop (GET_MODE (target), sub_optab,
1982                             target, dec,
1983                             target, 0, OPTAB_LIB_WIDEN);
1984   if (value != target)
1985     emit_move_insn (target, value);
1986 }
1987 \f
1988 /* Output a shift instruction for expression code CODE,
1989    with SHIFTED being the rtx for the value to shift,
1990    and AMOUNT the tree for the amount to shift by.
1991    Store the result in the rtx TARGET, if that is convenient.
1992    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1993    Return the rtx for where the value is.  */
1994
1995 rtx
1996 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1997               tree amount, rtx target, int unsignedp)
1998 {
1999   rtx op1, temp = 0;
2000   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2001   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2002   int try;
2003
2004   /* Previously detected shift-counts computed by NEGATE_EXPR
2005      and shifted in the other direction; but that does not work
2006      on all machines.  */
2007
2008   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2009
2010   if (SHIFT_COUNT_TRUNCATED)
2011     {
2012       if (GET_CODE (op1) == CONST_INT
2013           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2014               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2015         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2016                        % GET_MODE_BITSIZE (mode));
2017       else if (GET_CODE (op1) == SUBREG
2018                && subreg_lowpart_p (op1))
2019         op1 = SUBREG_REG (op1);
2020     }
2021
2022   if (op1 == const0_rtx)
2023     return shifted;
2024
2025   /* Check whether its cheaper to implement a left shift by a constant
2026      bit count by a sequence of additions.  */
2027   if (code == LSHIFT_EXPR
2028       && GET_CODE (op1) == CONST_INT
2029       && INTVAL (op1) > 0
2030       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2031       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2032     {
2033       int i;
2034       for (i = 0; i < INTVAL (op1); i++)
2035         {
2036           temp = force_reg (mode, shifted);
2037           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2038                                   unsignedp, OPTAB_LIB_WIDEN);
2039         }
2040       return shifted;
2041     }
2042
2043   for (try = 0; temp == 0 && try < 3; try++)
2044     {
2045       enum optab_methods methods;
2046
2047       if (try == 0)
2048         methods = OPTAB_DIRECT;
2049       else if (try == 1)
2050         methods = OPTAB_WIDEN;
2051       else
2052         methods = OPTAB_LIB_WIDEN;
2053
2054       if (rotate)
2055         {
2056           /* Widening does not work for rotation.  */
2057           if (methods == OPTAB_WIDEN)
2058             continue;
2059           else if (methods == OPTAB_LIB_WIDEN)
2060             {
2061               /* If we have been unable to open-code this by a rotation,
2062                  do it as the IOR of two shifts.  I.e., to rotate A
2063                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2064                  where C is the bitsize of A.
2065
2066                  It is theoretically possible that the target machine might
2067                  not be able to perform either shift and hence we would
2068                  be making two libcalls rather than just the one for the
2069                  shift (similarly if IOR could not be done).  We will allow
2070                  this extremely unlikely lossage to avoid complicating the
2071                  code below.  */
2072
2073               rtx subtarget = target == shifted ? 0 : target;
2074               rtx temp1;
2075               tree type = TREE_TYPE (amount);
2076               tree new_amount = make_tree (type, op1);
2077               tree other_amount
2078                 = fold (build2 (MINUS_EXPR, type, convert
2079                                 (type, build_int_cst
2080                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2081                                 amount));
2082
2083               shifted = force_reg (mode, shifted);
2084
2085               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2086                                    mode, shifted, new_amount, subtarget, 1);
2087               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2088                                     mode, shifted, other_amount, 0, 1);
2089               return expand_binop (mode, ior_optab, temp, temp1, target,
2090                                    unsignedp, methods);
2091             }
2092
2093           temp = expand_binop (mode,
2094                                left ? rotl_optab : rotr_optab,
2095                                shifted, op1, target, unsignedp, methods);
2096
2097           /* If we don't have the rotate, but we are rotating by a constant
2098              that is in range, try a rotate in the opposite direction.  */
2099
2100           if (temp == 0 && GET_CODE (op1) == CONST_INT
2101               && INTVAL (op1) > 0
2102               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2103             temp = expand_binop (mode,
2104                                  left ? rotr_optab : rotl_optab,
2105                                  shifted,
2106                                  GEN_INT (GET_MODE_BITSIZE (mode)
2107                                           - INTVAL (op1)),
2108                                  target, unsignedp, methods);
2109         }
2110       else if (unsignedp)
2111         temp = expand_binop (mode,
2112                              left ? ashl_optab : lshr_optab,
2113                              shifted, op1, target, unsignedp, methods);
2114
2115       /* Do arithmetic shifts.
2116          Also, if we are going to widen the operand, we can just as well
2117          use an arithmetic right-shift instead of a logical one.  */
2118       if (temp == 0 && ! rotate
2119           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2120         {
2121           enum optab_methods methods1 = methods;
2122
2123           /* If trying to widen a log shift to an arithmetic shift,
2124              don't accept an arithmetic shift of the same size.  */
2125           if (unsignedp)
2126             methods1 = OPTAB_MUST_WIDEN;
2127
2128           /* Arithmetic shift */
2129
2130           temp = expand_binop (mode,
2131                                left ? ashl_optab : ashr_optab,
2132                                shifted, op1, target, unsignedp, methods1);
2133         }
2134
2135       /* We used to try extzv here for logical right shifts, but that was
2136          only useful for one machine, the VAX, and caused poor code
2137          generation there for lshrdi3, so the code was deleted and a
2138          define_expand for lshrsi3 was added to vax.md.  */
2139     }
2140
2141   gcc_assert (temp);
2142   return temp;
2143 }
2144 \f
2145 enum alg_code { alg_zero, alg_m, alg_shift,
2146                   alg_add_t_m2, alg_sub_t_m2,
2147                   alg_add_factor, alg_sub_factor,
2148                   alg_add_t2_m, alg_sub_t2_m };
2149
2150 /* This structure holds the "cost" of a multiply sequence.  The
2151    "cost" field holds the total rtx_cost of every operator in the
2152    synthetic multiplication sequence, hence cost(a op b) is defined
2153    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2154    The "latency" field holds the minimum possible latency of the
2155    synthetic multiply, on a hypothetical infinitely parallel CPU.
2156    This is the critical path, or the maximum height, of the expression
2157    tree which is the sum of rtx_costs on the most expensive path from
2158    any leaf to the root.  Hence latency(a op b) is defined as zero for
2159    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2160
2161 struct mult_cost {
2162   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2163   short latency;  /* The latency of the multiplication sequence.  */
2164 };
2165
2166 /* This macro is used to compare a pointer to a mult_cost against an
2167    single integer "rtx_cost" value.  This is equivalent to the macro
2168    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2169 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2170                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2171
2172 /* This macro is used to compare two pointers to mult_costs against
2173    each other.  The macro returns true if X is cheaper than Y.
2174    Currently, the cheaper of two mult_costs is the one with the
2175    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2176 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2177                                  || ((X)->cost == (Y)->cost     \
2178                                      && (X)->latency < (Y)->latency))
2179
2180 /* This structure records a sequence of operations.
2181    `ops' is the number of operations recorded.
2182    `cost' is their total cost.
2183    The operations are stored in `op' and the corresponding
2184    logarithms of the integer coefficients in `log'.
2185
2186    These are the operations:
2187    alg_zero             total := 0;
2188    alg_m                total := multiplicand;
2189    alg_shift            total := total * coeff
2190    alg_add_t_m2         total := total + multiplicand * coeff;
2191    alg_sub_t_m2         total := total - multiplicand * coeff;
2192    alg_add_factor       total := total * coeff + total;
2193    alg_sub_factor       total := total * coeff - total;
2194    alg_add_t2_m         total := total * coeff + multiplicand;
2195    alg_sub_t2_m         total := total * coeff - multiplicand;
2196
2197    The first operand must be either alg_zero or alg_m.  */
2198
2199 struct algorithm
2200 {
2201   struct mult_cost cost;
2202   short ops;
2203   /* The size of the OP and LOG fields are not directly related to the
2204      word size, but the worst-case algorithms will be if we have few
2205      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2206      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2207      in total wordsize operations.  */
2208   enum alg_code op[MAX_BITS_PER_WORD];
2209   char log[MAX_BITS_PER_WORD];
2210 };
2211
2212 /* Indicates the type of fixup needed after a constant multiplication.
2213    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2214    the result should be negated, and ADD_VARIANT means that the
2215    multiplicand should be added to the result.  */
2216 enum mult_variant {basic_variant, negate_variant, add_variant};
2217
2218 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2219                         const struct mult_cost *, enum machine_mode mode);
2220 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2221                                  struct algorithm *, enum mult_variant *, int);
2222 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2223                               const struct algorithm *, enum mult_variant);
2224 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2225                                                  int, unsigned HOST_WIDE_INT *,
2226                                                  int *, int *);
2227 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2228 static rtx extract_high_half (enum machine_mode, rtx);
2229 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2230                                        int, int);
2231 /* Compute and return the best algorithm for multiplying by T.
2232    The algorithm must cost less than cost_limit
2233    If retval.cost >= COST_LIMIT, no algorithm was found and all
2234    other field of the returned struct are undefined.
2235    MODE is the machine mode of the multiplication.  */
2236
2237 static void
2238 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2239             const struct mult_cost *cost_limit, enum machine_mode mode)
2240 {
2241   int m;
2242   struct algorithm *alg_in, *best_alg;
2243   struct mult_cost best_cost;
2244   struct mult_cost new_limit;
2245   int op_cost, op_latency;
2246   unsigned HOST_WIDE_INT q;
2247   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2248
2249   /* Indicate that no algorithm is yet found.  If no algorithm
2250      is found, this value will be returned and indicate failure.  */
2251   alg_out->cost.cost = cost_limit->cost + 1;
2252   alg_out->cost.latency = cost_limit->latency + 1;
2253
2254   if (cost_limit->cost < 0
2255       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2256     return;
2257
2258   /* Restrict the bits of "t" to the multiplication's mode.  */
2259   t &= GET_MODE_MASK (mode);
2260
2261   /* t == 1 can be done in zero cost.  */
2262   if (t == 1)
2263     {
2264       alg_out->ops = 1;
2265       alg_out->cost.cost = 0;
2266       alg_out->cost.latency = 0;
2267       alg_out->op[0] = alg_m;
2268       return;
2269     }
2270
2271   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2272      fail now.  */
2273   if (t == 0)
2274     {
2275       if (MULT_COST_LESS (cost_limit, zero_cost))
2276         return;
2277       else
2278         {
2279           alg_out->ops = 1;
2280           alg_out->cost.cost = zero_cost;
2281           alg_out->cost.latency = zero_cost;
2282           alg_out->op[0] = alg_zero;
2283           return;
2284         }
2285     }
2286
2287   /* We'll be needing a couple extra algorithm structures now.  */
2288
2289   alg_in = alloca (sizeof (struct algorithm));
2290   best_alg = alloca (sizeof (struct algorithm));
2291   best_cost = *cost_limit;
2292
2293   /* If we have a group of zero bits at the low-order part of T, try
2294      multiplying by the remaining bits and then doing a shift.  */
2295
2296   if ((t & 1) == 0)
2297     {
2298       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2299       if (m < maxm)
2300         {
2301           q = t >> m;
2302           /* The function expand_shift will choose between a shift and
2303              a sequence of additions, so the observed cost is given as
2304              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2305           op_cost = m * add_cost[mode];
2306           if (shift_cost[mode][m] < op_cost)
2307             op_cost = shift_cost[mode][m];
2308           new_limit.cost = best_cost.cost - op_cost;
2309           new_limit.latency = best_cost.latency - op_cost;
2310           synth_mult (alg_in, q, &new_limit, mode);
2311
2312           alg_in->cost.cost += op_cost;
2313           alg_in->cost.latency += op_cost;
2314           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2315             {
2316               struct algorithm *x;
2317               best_cost = alg_in->cost;
2318               x = alg_in, alg_in = best_alg, best_alg = x;
2319               best_alg->log[best_alg->ops] = m;
2320               best_alg->op[best_alg->ops] = alg_shift;
2321             }
2322         }
2323     }
2324
2325   /* If we have an odd number, add or subtract one.  */
2326   if ((t & 1) != 0)
2327     {
2328       unsigned HOST_WIDE_INT w;
2329
2330       for (w = 1; (w & t) != 0; w <<= 1)
2331         ;
2332       /* If T was -1, then W will be zero after the loop.  This is another
2333          case where T ends with ...111.  Handling this with (T + 1) and
2334          subtract 1 produces slightly better code and results in algorithm
2335          selection much faster than treating it like the ...0111 case
2336          below.  */
2337       if (w == 0
2338           || (w > 2
2339               /* Reject the case where t is 3.
2340                  Thus we prefer addition in that case.  */
2341               && t != 3))
2342         {
2343           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2344
2345           op_cost = add_cost[mode];
2346           new_limit.cost = best_cost.cost - op_cost;
2347           new_limit.latency = best_cost.latency - op_cost;
2348           synth_mult (alg_in, t + 1, &new_limit, mode);
2349
2350           alg_in->cost.cost += op_cost;
2351           alg_in->cost.latency += op_cost;
2352           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2353             {
2354               struct algorithm *x;
2355               best_cost = alg_in->cost;
2356               x = alg_in, alg_in = best_alg, best_alg = x;
2357               best_alg->log[best_alg->ops] = 0;
2358               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2359             }
2360         }
2361       else
2362         {
2363           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2364
2365           op_cost = add_cost[mode];
2366           new_limit.cost = best_cost.cost - op_cost;
2367           new_limit.latency = best_cost.latency - op_cost;
2368           synth_mult (alg_in, t - 1, &new_limit, mode);
2369
2370           alg_in->cost.cost += op_cost;
2371           alg_in->cost.latency += op_cost;
2372           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2373             {
2374               struct algorithm *x;
2375               best_cost = alg_in->cost;
2376               x = alg_in, alg_in = best_alg, best_alg = x;
2377               best_alg->log[best_alg->ops] = 0;
2378               best_alg->op[best_alg->ops] = alg_add_t_m2;
2379             }
2380         }
2381     }
2382
2383   /* Look for factors of t of the form
2384      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2385      If we find such a factor, we can multiply by t using an algorithm that
2386      multiplies by q, shift the result by m and add/subtract it to itself.
2387
2388      We search for large factors first and loop down, even if large factors
2389      are less probable than small; if we find a large factor we will find a
2390      good sequence quickly, and therefore be able to prune (by decreasing
2391      COST_LIMIT) the search.  */
2392
2393   for (m = floor_log2 (t - 1); m >= 2; m--)
2394     {
2395       unsigned HOST_WIDE_INT d;
2396
2397       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2398       if (t % d == 0 && t > d && m < maxm)
2399         {
2400           /* If the target has a cheap shift-and-add instruction use
2401              that in preference to a shift insn followed by an add insn.
2402              Assume that the shift-and-add is "atomic" with a latency
2403              equal to it's cost, otherwise assume that on superscalar
2404              hardware the shift may be executed concurrently with the
2405              earlier steps in the algorithm.  */
2406           op_cost = add_cost[mode] + shift_cost[mode][m];
2407           if (shiftadd_cost[mode][m] < op_cost)
2408             {
2409               op_cost = shiftadd_cost[mode][m];
2410               op_latency = op_cost;
2411             }
2412           else
2413             op_latency = add_cost[mode];
2414
2415           new_limit.cost = best_cost.cost - op_cost;
2416           new_limit.latency = best_cost.latency - op_latency;
2417           synth_mult (alg_in, t / d, &new_limit, mode);
2418
2419           alg_in->cost.cost += op_cost;
2420           alg_in->cost.latency += op_latency;
2421           if (alg_in->cost.latency < op_cost)
2422             alg_in->cost.latency = op_cost;
2423           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2424             {
2425               struct algorithm *x;
2426               best_cost = alg_in->cost;
2427               x = alg_in, alg_in = best_alg, best_alg = x;
2428               best_alg->log[best_alg->ops] = m;
2429               best_alg->op[best_alg->ops] = alg_add_factor;
2430             }
2431           /* Other factors will have been taken care of in the recursion.  */
2432           break;
2433         }
2434
2435       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2436       if (t % d == 0 && t > d && m < maxm)
2437         {
2438           /* If the target has a cheap shift-and-subtract insn use
2439              that in preference to a shift insn followed by a sub insn.
2440              Assume that the shift-and-sub is "atomic" with a latency
2441              equal to it's cost, otherwise assume that on superscalar
2442              hardware the shift may be executed concurrently with the
2443              earlier steps in the algorithm.  */
2444           op_cost = add_cost[mode] + shift_cost[mode][m];
2445           if (shiftsub_cost[mode][m] < op_cost)
2446             {
2447               op_cost = shiftsub_cost[mode][m];
2448               op_latency = op_cost;
2449             }
2450           else
2451             op_latency = add_cost[mode];
2452
2453           new_limit.cost = best_cost.cost - op_cost;
2454           new_limit.cost = best_cost.cost - op_latency;
2455           synth_mult (alg_in, t / d, &new_limit, mode);
2456
2457           alg_in->cost.cost += op_cost;
2458           alg_in->cost.latency += op_latency;
2459           if (alg_in->cost.latency < op_cost)
2460             alg_in->cost.latency = op_cost;
2461           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2462             {
2463               struct algorithm *x;
2464               best_cost = alg_in->cost;
2465               x = alg_in, alg_in = best_alg, best_alg = x;
2466               best_alg->log[best_alg->ops] = m;
2467               best_alg->op[best_alg->ops] = alg_sub_factor;
2468             }
2469           break;
2470         }
2471     }
2472
2473   /* Try shift-and-add (load effective address) instructions,
2474      i.e. do a*3, a*5, a*9.  */
2475   if ((t & 1) != 0)
2476     {
2477       q = t - 1;
2478       q = q & -q;
2479       m = exact_log2 (q);
2480       if (m >= 0 && m < maxm)
2481         {
2482           op_cost = shiftadd_cost[mode][m];
2483           new_limit.cost = best_cost.cost - op_cost;
2484           new_limit.latency = best_cost.latency - op_cost;
2485           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2486
2487           alg_in->cost.cost += op_cost;
2488           alg_in->cost.latency += op_cost;
2489           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2490             {
2491               struct algorithm *x;
2492               best_cost = alg_in->cost;
2493               x = alg_in, alg_in = best_alg, best_alg = x;
2494               best_alg->log[best_alg->ops] = m;
2495               best_alg->op[best_alg->ops] = alg_add_t2_m;
2496             }
2497         }
2498
2499       q = t + 1;
2500       q = q & -q;
2501       m = exact_log2 (q);
2502       if (m >= 0 && m < maxm)
2503         {
2504           op_cost = shiftsub_cost[mode][m];
2505           new_limit.cost = best_cost.cost - op_cost;
2506           new_limit.latency = best_cost.latency - op_cost;
2507           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2508
2509           alg_in->cost.cost += op_cost;
2510           alg_in->cost.latency += op_cost;
2511           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2512             {
2513               struct algorithm *x;
2514               best_cost = alg_in->cost;
2515               x = alg_in, alg_in = best_alg, best_alg = x;
2516               best_alg->log[best_alg->ops] = m;
2517               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2518             }
2519         }
2520     }
2521
2522   /* If best_cost has not decreased, we have not found any algorithm.  */
2523   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2524     return;
2525
2526   /* If we are getting a too long sequence for `struct algorithm'
2527      to record, make this search fail.  */
2528   if (best_alg->ops == MAX_BITS_PER_WORD)
2529     return;
2530
2531   /* Copy the algorithm from temporary space to the space at alg_out.
2532      We avoid using structure assignment because the majority of
2533      best_alg is normally undefined, and this is a critical function.  */
2534   alg_out->ops = best_alg->ops + 1;
2535   alg_out->cost = best_cost;
2536   memcpy (alg_out->op, best_alg->op,
2537           alg_out->ops * sizeof *alg_out->op);
2538   memcpy (alg_out->log, best_alg->log,
2539           alg_out->ops * sizeof *alg_out->log);
2540 }
2541 \f
2542 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2543    Try three variations:
2544
2545        - a shift/add sequence based on VAL itself
2546        - a shift/add sequence based on -VAL, followed by a negation
2547        - a shift/add sequence based on VAL - 1, followed by an addition.
2548
2549    Return true if the cheapest of these cost less than MULT_COST,
2550    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2551
2552 static bool
2553 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2554                      struct algorithm *alg, enum mult_variant *variant,
2555                      int mult_cost)
2556 {
2557   struct algorithm alg2;
2558   struct mult_cost limit;
2559   int op_cost;
2560
2561   *variant = basic_variant;
2562   limit.cost = mult_cost;
2563   limit.latency = mult_cost;
2564   synth_mult (alg, val, &limit, mode);
2565
2566   /* This works only if the inverted value actually fits in an
2567      `unsigned int' */
2568   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2569     {
2570       op_cost = neg_cost[mode];
2571       if (MULT_COST_LESS (&alg->cost, mult_cost))
2572         {
2573           limit.cost = alg->cost.cost - op_cost;
2574           limit.latency = alg->cost.latency - op_cost;
2575         }
2576       else
2577         {
2578           limit.cost = mult_cost - op_cost;
2579           limit.latency = mult_cost - op_cost;
2580         }
2581
2582       synth_mult (&alg2, -val, &limit, mode);
2583       alg2.cost.cost += op_cost;
2584       alg2.cost.latency += op_cost;
2585       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2586         *alg = alg2, *variant = negate_variant;
2587     }
2588
2589   /* This proves very useful for division-by-constant.  */
2590   op_cost = add_cost[mode];
2591   if (MULT_COST_LESS (&alg->cost, mult_cost))
2592     {
2593       limit.cost = alg->cost.cost - op_cost;
2594       limit.latency = alg->cost.latency - op_cost;
2595     }
2596   else
2597     {
2598       limit.cost = mult_cost - op_cost;
2599       limit.latency = mult_cost - op_cost;
2600     }
2601
2602   synth_mult (&alg2, val - 1, &limit, mode);
2603   alg2.cost.cost += op_cost;
2604   alg2.cost.latency += op_cost;
2605   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2606     *alg = alg2, *variant = add_variant;
2607
2608   return MULT_COST_LESS (&alg->cost, mult_cost);
2609 }
2610
2611 /* A subroutine of expand_mult, used for constant multiplications.
2612    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2613    convenient.  Use the shift/add sequence described by ALG and apply
2614    the final fixup specified by VARIANT.  */
2615
2616 static rtx
2617 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2618                    rtx target, const struct algorithm *alg,
2619                    enum mult_variant variant)
2620 {
2621   HOST_WIDE_INT val_so_far;
2622   rtx insn, accum, tem;
2623   int opno;
2624   enum machine_mode nmode;
2625
2626   /* Avoid referencing memory over and over.
2627      For speed, but also for correctness when mem is volatile.  */
2628   if (MEM_P (op0))
2629     op0 = force_reg (mode, op0);
2630
2631   /* ACCUM starts out either as OP0 or as a zero, depending on
2632      the first operation.  */
2633
2634   if (alg->op[0] == alg_zero)
2635     {
2636       accum = copy_to_mode_reg (mode, const0_rtx);
2637       val_so_far = 0;
2638     }
2639   else if (alg->op[0] == alg_m)
2640     {
2641       accum = copy_to_mode_reg (mode, op0);
2642       val_so_far = 1;
2643     }
2644   else
2645     gcc_unreachable ();
2646
2647   for (opno = 1; opno < alg->ops; opno++)
2648     {
2649       int log = alg->log[opno];
2650       rtx shift_subtarget = optimize ? 0 : accum;
2651       rtx add_target
2652         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2653            && !optimize)
2654           ? target : 0;
2655       rtx accum_target = optimize ? 0 : accum;
2656
2657       switch (alg->op[opno])
2658         {
2659         case alg_shift:
2660           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2661                                 build_int_cst (NULL_TREE, log),
2662                                 NULL_RTX, 0);
2663           val_so_far <<= log;
2664           break;
2665
2666         case alg_add_t_m2:
2667           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2668                               build_int_cst (NULL_TREE, log),
2669                               NULL_RTX, 0);
2670           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2671                                  add_target ? add_target : accum_target);
2672           val_so_far += (HOST_WIDE_INT) 1 << log;
2673           break;
2674
2675         case alg_sub_t_m2:
2676           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2677                               build_int_cst (NULL_TREE, log),
2678                               NULL_RTX, 0);
2679           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2680                                  add_target ? add_target : accum_target);
2681           val_so_far -= (HOST_WIDE_INT) 1 << log;
2682           break;
2683
2684         case alg_add_t2_m:
2685           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2686                                 build_int_cst (NULL_TREE, log),
2687                                 shift_subtarget,
2688                                 0);
2689           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2690                                  add_target ? add_target : accum_target);
2691           val_so_far = (val_so_far << log) + 1;
2692           break;
2693
2694         case alg_sub_t2_m:
2695           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2696                                 build_int_cst (NULL_TREE, log),
2697                                 shift_subtarget, 0);
2698           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2699                                  add_target ? add_target : accum_target);
2700           val_so_far = (val_so_far << log) - 1;
2701           break;
2702
2703         case alg_add_factor:
2704           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2705                               build_int_cst (NULL_TREE, log),
2706                               NULL_RTX, 0);
2707           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2708                                  add_target ? add_target : accum_target);
2709           val_so_far += val_so_far << log;
2710           break;
2711
2712         case alg_sub_factor:
2713           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2714                               build_int_cst (NULL_TREE, log),
2715                               NULL_RTX, 0);
2716           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2717                                  (add_target
2718                                   ? add_target : (optimize ? 0 : tem)));
2719           val_so_far = (val_so_far << log) - val_so_far;
2720           break;
2721
2722         default:
2723           gcc_unreachable ();
2724         }
2725
2726       /* Write a REG_EQUAL note on the last insn so that we can cse
2727          multiplication sequences.  Note that if ACCUM is a SUBREG,
2728          we've set the inner register and must properly indicate
2729          that.  */
2730
2731       tem = op0, nmode = mode;
2732       if (GET_CODE (accum) == SUBREG)
2733         {
2734           nmode = GET_MODE (SUBREG_REG (accum));
2735           tem = gen_lowpart (nmode, op0);
2736         }
2737
2738       insn = get_last_insn ();
2739       set_unique_reg_note (insn, REG_EQUAL,
2740                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2741     }
2742
2743   if (variant == negate_variant)
2744     {
2745       val_so_far = -val_so_far;
2746       accum = expand_unop (mode, neg_optab, accum, target, 0);
2747     }
2748   else if (variant == add_variant)
2749     {
2750       val_so_far = val_so_far + 1;
2751       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2752     }
2753
2754   /* Compare only the bits of val and val_so_far that are significant
2755      in the result mode, to avoid sign-/zero-extension confusion.  */
2756   val &= GET_MODE_MASK (mode);
2757   val_so_far &= GET_MODE_MASK (mode);
2758   gcc_assert (val == val_so_far);
2759
2760   return accum;
2761 }
2762
2763 /* Perform a multiplication and return an rtx for the result.
2764    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2765    TARGET is a suggestion for where to store the result (an rtx).
2766
2767    We check specially for a constant integer as OP1.
2768    If you want this check for OP0 as well, then before calling
2769    you should swap the two operands if OP0 would be constant.  */
2770
2771 rtx
2772 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2773              int unsignedp)
2774 {
2775   rtx const_op1 = op1;
2776   enum mult_variant variant;
2777   struct algorithm algorithm;
2778
2779   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2780      less than or equal in size to `unsigned int' this doesn't matter.
2781      If the mode is larger than `unsigned int', then synth_mult works only
2782      if the constant value exactly fits in an `unsigned int' without any
2783      truncation.  This means that multiplying by negative values does
2784      not work; results are off by 2^32 on a 32 bit machine.  */
2785
2786   /* If we are multiplying in DImode, it may still be a win
2787      to try to work with shifts and adds.  */
2788   if (GET_CODE (op1) == CONST_DOUBLE
2789       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2790       && HOST_BITS_PER_INT >= BITS_PER_WORD
2791       && CONST_DOUBLE_HIGH (op1) == 0)
2792     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2793   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2794            && GET_CODE (op1) == CONST_INT
2795            && INTVAL (op1) < 0)
2796     const_op1 = 0;
2797
2798   /* We used to test optimize here, on the grounds that it's better to
2799      produce a smaller program when -O is not used.
2800      But this causes such a terrible slowdown sometimes
2801      that it seems better to use synth_mult always.  */
2802
2803   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2804       && (unsignedp || !flag_trapv))
2805     {
2806       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2807
2808       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2809                                mult_cost))
2810         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2811                                   &algorithm, variant);
2812     }
2813
2814   if (GET_CODE (op0) == CONST_DOUBLE)
2815     {
2816       rtx temp = op0;
2817       op0 = op1;
2818       op1 = temp;
2819     }
2820
2821   /* Expand x*2.0 as x+x.  */
2822   if (GET_CODE (op1) == CONST_DOUBLE
2823       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2824     {
2825       REAL_VALUE_TYPE d;
2826       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2827
2828       if (REAL_VALUES_EQUAL (d, dconst2))
2829         {
2830           op0 = force_reg (GET_MODE (op0), op0);
2831           return expand_binop (mode, add_optab, op0, op0,
2832                                target, unsignedp, OPTAB_LIB_WIDEN);
2833         }
2834     }
2835
2836   /* This used to use umul_optab if unsigned, but for non-widening multiply
2837      there is no difference between signed and unsigned.  */
2838   op0 = expand_binop (mode,
2839                       ! unsignedp
2840                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2841                       ? smulv_optab : smul_optab,
2842                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2843   gcc_assert (op0);
2844   return op0;
2845 }
2846 \f
2847 /* Return the smallest n such that 2**n >= X.  */
2848
2849 int
2850 ceil_log2 (unsigned HOST_WIDE_INT x)
2851 {
2852   return floor_log2 (x - 1) + 1;
2853 }
2854
2855 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2856    replace division by D, and put the least significant N bits of the result
2857    in *MULTIPLIER_PTR and return the most significant bit.
2858
2859    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2860    needed precision is in PRECISION (should be <= N).
2861
2862    PRECISION should be as small as possible so this function can choose
2863    multiplier more freely.
2864
2865    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2866    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2867
2868    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2869    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2870
2871 static
2872 unsigned HOST_WIDE_INT
2873 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2874                    unsigned HOST_WIDE_INT *multiplier_ptr,
2875                    int *post_shift_ptr, int *lgup_ptr)
2876 {
2877   HOST_WIDE_INT mhigh_hi, mlow_hi;
2878   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2879   int lgup, post_shift;
2880   int pow, pow2;
2881   unsigned HOST_WIDE_INT nl, dummy1;
2882   HOST_WIDE_INT nh, dummy2;
2883
2884   /* lgup = ceil(log2(divisor)); */
2885   lgup = ceil_log2 (d);
2886
2887   gcc_assert (lgup <= n);
2888
2889   pow = n + lgup;
2890   pow2 = n + lgup - precision;
2891
2892   /* We could handle this with some effort, but this case is much
2893      better handled directly with a scc insn, so rely on caller using
2894      that.  */
2895   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
2896
2897   /* mlow = 2^(N + lgup)/d */
2898  if (pow >= HOST_BITS_PER_WIDE_INT)
2899     {
2900       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2901       nl = 0;
2902     }
2903   else
2904     {
2905       nh = 0;
2906       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2907     }
2908   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2909                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2910
2911   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2912   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2913     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2914   else
2915     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2916   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2917                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2918
2919   gcc_assert (!mhigh_hi || nh - d < d);
2920   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
2921   /* Assert that mlow < mhigh.  */
2922   gcc_assert (mlow_hi < mhigh_hi
2923               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
2924
2925   /* If precision == N, then mlow, mhigh exceed 2^N
2926      (but they do not exceed 2^(N+1)).  */
2927
2928   /* Reduce to lowest terms.  */
2929   for (post_shift = lgup; post_shift > 0; post_shift--)
2930     {
2931       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2932       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2933       if (ml_lo >= mh_lo)
2934         break;
2935
2936       mlow_hi = 0;
2937       mlow_lo = ml_lo;
2938       mhigh_hi = 0;
2939       mhigh_lo = mh_lo;
2940     }
2941
2942   *post_shift_ptr = post_shift;
2943   *lgup_ptr = lgup;
2944   if (n < HOST_BITS_PER_WIDE_INT)
2945     {
2946       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2947       *multiplier_ptr = mhigh_lo & mask;
2948       return mhigh_lo >= mask;
2949     }
2950   else
2951     {
2952       *multiplier_ptr = mhigh_lo;
2953       return mhigh_hi;
2954     }
2955 }
2956
2957 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2958    congruent to 1 (mod 2**N).  */
2959
2960 static unsigned HOST_WIDE_INT
2961 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2962 {
2963   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2964
2965   /* The algorithm notes that the choice y = x satisfies
2966      x*y == 1 mod 2^3, since x is assumed odd.
2967      Each iteration doubles the number of bits of significance in y.  */
2968
2969   unsigned HOST_WIDE_INT mask;
2970   unsigned HOST_WIDE_INT y = x;
2971   int nbit = 3;
2972
2973   mask = (n == HOST_BITS_PER_WIDE_INT
2974           ? ~(unsigned HOST_WIDE_INT) 0
2975           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2976
2977   while (nbit < n)
2978     {
2979       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2980       nbit *= 2;
2981     }
2982   return y;
2983 }
2984
2985 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2986    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2987    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2988    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2989    become signed.
2990
2991    The result is put in TARGET if that is convenient.
2992
2993    MODE is the mode of operation.  */
2994
2995 rtx
2996 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2997                              rtx op1, rtx target, int unsignedp)
2998 {
2999   rtx tem;
3000   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3001
3002   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3003                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3004                       NULL_RTX, 0);
3005   tem = expand_and (mode, tem, op1, NULL_RTX);
3006   adj_operand
3007     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3008                      adj_operand);
3009
3010   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3011                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3012                       NULL_RTX, 0);
3013   tem = expand_and (mode, tem, op0, NULL_RTX);
3014   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3015                           target);
3016
3017   return target;
3018 }
3019
3020 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3021
3022 static rtx
3023 extract_high_half (enum machine_mode mode, rtx op)
3024 {
3025   enum machine_mode wider_mode;
3026
3027   if (mode == word_mode)
3028     return gen_highpart (mode, op);
3029
3030   wider_mode = GET_MODE_WIDER_MODE (mode);
3031   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3032                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3033   return convert_modes (mode, wider_mode, op, 0);
3034 }
3035
3036 /* Like expand_mult_highpart, but only consider using a multiplication
3037    optab.  OP1 is an rtx for the constant operand.  */
3038
3039 static rtx
3040 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3041                             rtx target, int unsignedp, int max_cost)
3042 {
3043   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3044   enum machine_mode wider_mode;
3045   optab moptab;
3046   rtx tem;
3047   int size;
3048
3049   wider_mode = GET_MODE_WIDER_MODE (mode);
3050   size = GET_MODE_BITSIZE (mode);
3051
3052   /* Firstly, try using a multiplication insn that only generates the needed
3053      high part of the product, and in the sign flavor of unsignedp.  */
3054   if (mul_highpart_cost[mode] < max_cost)
3055     {
3056       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3057       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3058                           unsignedp, OPTAB_DIRECT);
3059       if (tem)
3060         return tem;
3061     }
3062
3063   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3064      Need to adjust the result after the multiplication.  */
3065   if (size - 1 < BITS_PER_WORD
3066       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3067           + 4 * add_cost[mode] < max_cost))
3068     {
3069       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3070       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3071                           unsignedp, OPTAB_DIRECT);
3072       if (tem)
3073         /* We used the wrong signedness.  Adjust the result.  */
3074         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3075                                             tem, unsignedp);
3076     }
3077
3078   /* Try widening multiplication.  */
3079   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3080   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3081       && mul_widen_cost[wider_mode] < max_cost)
3082     {
3083       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3084                           unsignedp, OPTAB_WIDEN);
3085       if (tem)
3086         return extract_high_half (mode, tem);
3087     }
3088
3089   /* Try widening the mode and perform a non-widening multiplication.  */
3090   moptab = smul_optab;
3091   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3092       && size - 1 < BITS_PER_WORD
3093       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3094     {
3095       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3096                           unsignedp, OPTAB_WIDEN);
3097       if (tem)
3098         return extract_high_half (mode, tem);
3099     }
3100
3101   /* Try widening multiplication of opposite signedness, and adjust.  */
3102   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3103   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3104       && size - 1 < BITS_PER_WORD
3105       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3106           + 4 * add_cost[mode] < max_cost))
3107     {
3108       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3109                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3110       if (tem != 0)
3111         {
3112           tem = extract_high_half (mode, tem);
3113           /* We used the wrong signedness.  Adjust the result.  */
3114           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3115                                               target, unsignedp);
3116         }
3117     }
3118
3119   return 0;
3120 }
3121
3122 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3123    in TARGET if that is convenient, and return where the result is.  If the
3124    operation can not be performed, 0 is returned.
3125
3126    MODE is the mode of operation and result.
3127
3128    UNSIGNEDP nonzero means unsigned multiply.
3129
3130    MAX_COST is the total allowed cost for the expanded RTL.  */
3131
3132 rtx
3133 expand_mult_highpart (enum machine_mode mode, rtx op0,
3134                       unsigned HOST_WIDE_INT cnst1, rtx target,
3135                       int unsignedp, int max_cost)
3136 {
3137   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3138   int extra_cost;
3139   bool sign_adjust = false;
3140   enum mult_variant variant;
3141   struct algorithm alg;
3142   rtx op1, tem;
3143
3144   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3145   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3146
3147   op1 = gen_int_mode (cnst1, wider_mode);
3148   cnst1 &= GET_MODE_MASK (mode);
3149
3150   /* We can't optimize modes wider than BITS_PER_WORD.
3151      ??? We might be able to perform double-word arithmetic if
3152      mode == word_mode, however all the cost calculations in
3153      synth_mult etc. assume single-word operations.  */
3154   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3155     return expand_mult_highpart_optab (mode, op0, op1, target,
3156                                        unsignedp, max_cost);
3157
3158   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3159
3160   /* Check whether we try to multiply by a negative constant.  */
3161   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3162     {
3163       sign_adjust = true;
3164       extra_cost += add_cost[mode];
3165     }
3166
3167   /* See whether shift/add multiplication is cheap enough.  */
3168   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3169                            max_cost - extra_cost))
3170     {
3171       /* See whether the specialized multiplication optabs are
3172          cheaper than the shift/add version.  */
3173       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3174                                         alg.cost.cost + extra_cost);
3175       if (tem)
3176         return tem;
3177
3178       tem = convert_to_mode (wider_mode, op0, unsignedp);
3179       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3180       tem = extract_high_half (mode, tem);
3181
3182       /* Adjust result for signedness.  */
3183       if (sign_adjust)
3184         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3185
3186       return tem;
3187     }
3188   return expand_mult_highpart_optab (mode, op0, op1, target,
3189                                      unsignedp, max_cost);
3190 }
3191
3192
3193 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3194
3195 static rtx
3196 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3197 {
3198   unsigned HOST_WIDE_INT masklow, maskhigh;
3199   rtx result, temp, shift, label;
3200   int logd;
3201
3202   logd = floor_log2 (d);
3203   result = gen_reg_rtx (mode);
3204
3205   /* Avoid conditional branches when they're expensive.  */
3206   if (BRANCH_COST >= 2
3207       && !optimize_size)
3208     {
3209       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3210                                       mode, 0, -1);
3211       if (signmask)
3212         {
3213           signmask = force_reg (mode, signmask);
3214           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3215           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3216
3217           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3218              which instruction sequence to use.  If logical right shifts
3219              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3220              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3221
3222           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3223           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3224               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3225             {
3226               temp = expand_binop (mode, xor_optab, op0, signmask,
3227                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3228               temp = expand_binop (mode, sub_optab, temp, signmask,
3229                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3230               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3231                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3232               temp = expand_binop (mode, xor_optab, temp, signmask,
3233                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3234               temp = expand_binop (mode, sub_optab, temp, signmask,
3235                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3236             }
3237           else
3238             {
3239               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3240                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3241               signmask = force_reg (mode, signmask);
3242
3243               temp = expand_binop (mode, add_optab, op0, signmask,
3244                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3245               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3246                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3247               temp = expand_binop (mode, sub_optab, temp, signmask,
3248                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3249             }
3250           return temp;
3251         }
3252     }
3253
3254   /* Mask contains the mode's signbit and the significant bits of the
3255      modulus.  By including the signbit in the operation, many targets
3256      can avoid an explicit compare operation in the following comparison
3257      against zero.  */
3258
3259   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3260   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3261     {
3262       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3263       maskhigh = -1;
3264     }
3265   else
3266     maskhigh = (HOST_WIDE_INT) -1
3267                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3268
3269   temp = expand_binop (mode, and_optab, op0,
3270                        immed_double_const (masklow, maskhigh, mode),
3271                        result, 1, OPTAB_LIB_WIDEN);
3272   if (temp != result)
3273     emit_move_insn (result, temp);
3274
3275   label = gen_label_rtx ();
3276   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3277
3278   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3279                        0, OPTAB_LIB_WIDEN);
3280   masklow = (HOST_WIDE_INT) -1 << logd;
3281   maskhigh = -1;
3282   temp = expand_binop (mode, ior_optab, temp,
3283                        immed_double_const (masklow, maskhigh, mode),
3284                        result, 1, OPTAB_LIB_WIDEN);
3285   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3286                        0, OPTAB_LIB_WIDEN);
3287   if (temp != result)
3288     emit_move_insn (result, temp);
3289   emit_label (label);
3290   return result;
3291 }
3292
3293 /* Expand signed division of OP0 by a power of two D in mode MODE.
3294    This routine is only called for positive values of D.  */
3295
3296 static rtx
3297 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3298 {
3299   rtx temp, label;
3300   tree shift;
3301   int logd;
3302
3303   logd = floor_log2 (d);
3304   shift = build_int_cst (NULL_TREE, logd);
3305
3306   if (d == 2 && BRANCH_COST >= 1)
3307     {
3308       temp = gen_reg_rtx (mode);
3309       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3310       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3311                            0, OPTAB_LIB_WIDEN);
3312       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3313     }
3314
3315 #ifdef HAVE_conditional_move
3316   if (BRANCH_COST >= 2)
3317     {
3318       rtx temp2;
3319
3320       /* ??? emit_conditional_move forces a stack adjustment via
3321          compare_from_rtx so, if the sequence is discarded, it will
3322          be lost.  Do it now instead.  */
3323       do_pending_stack_adjust ();
3324
3325       start_sequence ();
3326       temp2 = copy_to_mode_reg (mode, op0);
3327       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3328                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3329       temp = force_reg (mode, temp);
3330
3331       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3332       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3333                                      mode, temp, temp2, mode, 0);
3334       if (temp2)
3335         {
3336           rtx seq = get_insns ();
3337           end_sequence ();
3338           emit_insn (seq);
3339           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3340         }
3341       end_sequence ();
3342     }
3343 #endif
3344
3345   if (BRANCH_COST >= 2)
3346     {
3347       int ushift = GET_MODE_BITSIZE (mode) - logd;
3348
3349       temp = gen_reg_rtx (mode);
3350       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3351       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3352         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3353                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3354       else
3355         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3356                              build_int_cst (NULL_TREE, ushift),
3357                              NULL_RTX, 1);
3358       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3359                            0, OPTAB_LIB_WIDEN);
3360       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3361     }
3362
3363   label = gen_label_rtx ();
3364   temp = copy_to_mode_reg (mode, op0);
3365   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3366   expand_inc (temp, GEN_INT (d - 1));
3367   emit_label (label);
3368   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3369 }
3370 \f
3371 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3372    if that is convenient, and returning where the result is.
3373    You may request either the quotient or the remainder as the result;
3374    specify REM_FLAG nonzero to get the remainder.
3375
3376    CODE is the expression code for which kind of division this is;
3377    it controls how rounding is done.  MODE is the machine mode to use.
3378    UNSIGNEDP nonzero means do unsigned division.  */
3379
3380 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3381    and then correct it by or'ing in missing high bits
3382    if result of ANDI is nonzero.
3383    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3384    This could optimize to a bfexts instruction.
3385    But C doesn't use these operations, so their optimizations are
3386    left for later.  */
3387 /* ??? For modulo, we don't actually need the highpart of the first product,
3388    the low part will do nicely.  And for small divisors, the second multiply
3389    can also be a low-part only multiply or even be completely left out.
3390    E.g. to calculate the remainder of a division by 3 with a 32 bit
3391    multiply, multiply with 0x55555556 and extract the upper two bits;
3392    the result is exact for inputs up to 0x1fffffff.
3393    The input range can be reduced by using cross-sum rules.
3394    For odd divisors >= 3, the following table gives right shift counts
3395    so that if a number is shifted by an integer multiple of the given
3396    amount, the remainder stays the same:
3397    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3398    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3399    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3400    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3401    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3402
3403    Cross-sum rules for even numbers can be derived by leaving as many bits
3404    to the right alone as the divisor has zeros to the right.
3405    E.g. if x is an unsigned 32 bit number:
3406    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3407    */
3408
3409 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3410
3411 rtx
3412 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3413                rtx op0, rtx op1, rtx target, int unsignedp)
3414 {
3415   enum machine_mode compute_mode;
3416   rtx tquotient;
3417   rtx quotient = 0, remainder = 0;
3418   rtx last;
3419   int size;
3420   rtx insn, set;
3421   optab optab1, optab2;
3422   int op1_is_constant, op1_is_pow2 = 0;
3423   int max_cost, extra_cost;
3424   static HOST_WIDE_INT last_div_const = 0;
3425   static HOST_WIDE_INT ext_op1;
3426
3427   op1_is_constant = GET_CODE (op1) == CONST_INT;
3428   if (op1_is_constant)
3429     {
3430       ext_op1 = INTVAL (op1);
3431       if (unsignedp)
3432         ext_op1 &= GET_MODE_MASK (mode);
3433       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3434                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3435     }
3436
3437   /*
3438      This is the structure of expand_divmod:
3439
3440      First comes code to fix up the operands so we can perform the operations
3441      correctly and efficiently.
3442
3443      Second comes a switch statement with code specific for each rounding mode.
3444      For some special operands this code emits all RTL for the desired
3445      operation, for other cases, it generates only a quotient and stores it in
3446      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3447      to indicate that it has not done anything.
3448
3449      Last comes code that finishes the operation.  If QUOTIENT is set and
3450      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3451      QUOTIENT is not set, it is computed using trunc rounding.
3452
3453      We try to generate special code for division and remainder when OP1 is a
3454      constant.  If |OP1| = 2**n we can use shifts and some other fast
3455      operations.  For other values of OP1, we compute a carefully selected
3456      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3457      by m.
3458
3459      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3460      half of the product.  Different strategies for generating the product are
3461      implemented in expand_mult_highpart.
3462
3463      If what we actually want is the remainder, we generate that by another
3464      by-constant multiplication and a subtraction.  */
3465
3466   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3467      code below will malfunction if we are, so check here and handle
3468      the special case if so.  */
3469   if (op1 == const1_rtx)
3470     return rem_flag ? const0_rtx : op0;
3471
3472     /* When dividing by -1, we could get an overflow.
3473      negv_optab can handle overflows.  */
3474   if (! unsignedp && op1 == constm1_rtx)
3475     {
3476       if (rem_flag)
3477         return const0_rtx;
3478       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3479                           ? negv_optab : neg_optab, op0, target, 0);
3480     }
3481
3482   if (target
3483       /* Don't use the function value register as a target
3484          since we have to read it as well as write it,
3485          and function-inlining gets confused by this.  */
3486       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3487           /* Don't clobber an operand while doing a multi-step calculation.  */
3488           || ((rem_flag || op1_is_constant)
3489               && (reg_mentioned_p (target, op0)
3490                   || (MEM_P (op0) && MEM_P (target))))
3491           || reg_mentioned_p (target, op1)
3492           || (MEM_P (op1) && MEM_P (target))))
3493     target = 0;
3494
3495   /* Get the mode in which to perform this computation.  Normally it will
3496      be MODE, but sometimes we can't do the desired operation in MODE.
3497      If so, pick a wider mode in which we can do the operation.  Convert
3498      to that mode at the start to avoid repeated conversions.
3499
3500      First see what operations we need.  These depend on the expression
3501      we are evaluating.  (We assume that divxx3 insns exist under the
3502      same conditions that modxx3 insns and that these insns don't normally
3503      fail.  If these assumptions are not correct, we may generate less
3504      efficient code in some cases.)
3505
3506      Then see if we find a mode in which we can open-code that operation
3507      (either a division, modulus, or shift).  Finally, check for the smallest
3508      mode for which we can do the operation with a library call.  */
3509
3510   /* We might want to refine this now that we have division-by-constant
3511      optimization.  Since expand_mult_highpart tries so many variants, it is
3512      not straightforward to generalize this.  Maybe we should make an array
3513      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3514
3515   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3516             ? (unsignedp ? lshr_optab : ashr_optab)
3517             : (unsignedp ? udiv_optab : sdiv_optab));
3518   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3519             ? optab1
3520             : (unsignedp ? udivmod_optab : sdivmod_optab));
3521
3522   for (compute_mode = mode; compute_mode != VOIDmode;
3523        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3524     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3525         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3526       break;
3527
3528   if (compute_mode == VOIDmode)
3529     for (compute_mode = mode; compute_mode != VOIDmode;
3530          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3531       if (optab1->handlers[compute_mode].libfunc
3532           || optab2->handlers[compute_mode].libfunc)
3533         break;
3534
3535   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3536      in expand_binop.  */
3537   if (compute_mode == VOIDmode)
3538     compute_mode = mode;
3539
3540   if (target && GET_MODE (target) == compute_mode)
3541     tquotient = target;
3542   else
3543     tquotient = gen_reg_rtx (compute_mode);
3544
3545   size = GET_MODE_BITSIZE (compute_mode);
3546 #if 0
3547   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3548      (mode), and thereby get better code when OP1 is a constant.  Do that
3549      later.  It will require going over all usages of SIZE below.  */
3550   size = GET_MODE_BITSIZE (mode);
3551 #endif
3552
3553   /* Only deduct something for a REM if the last divide done was
3554      for a different constant.   Then set the constant of the last
3555      divide.  */
3556   max_cost = div_cost[compute_mode]
3557     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3558                       && INTVAL (op1) == last_div_const)
3559        ? mul_cost[compute_mode] + add_cost[compute_mode]
3560        : 0);
3561
3562   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3563
3564   /* Now convert to the best mode to use.  */
3565   if (compute_mode != mode)
3566     {
3567       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3568       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3569
3570       /* convert_modes may have placed op1 into a register, so we
3571          must recompute the following.  */
3572       op1_is_constant = GET_CODE (op1) == CONST_INT;
3573       op1_is_pow2 = (op1_is_constant
3574                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3575                           || (! unsignedp
3576                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3577     }
3578
3579   /* If one of the operands is a volatile MEM, copy it into a register.  */
3580
3581   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3582     op0 = force_reg (compute_mode, op0);
3583   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3584     op1 = force_reg (compute_mode, op1);
3585
3586   /* If we need the remainder or if OP1 is constant, we need to
3587      put OP0 in a register in case it has any queued subexpressions.  */
3588   if (rem_flag || op1_is_constant)
3589     op0 = force_reg (compute_mode, op0);
3590
3591   last = get_last_insn ();
3592
3593   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3594   if (unsignedp)
3595     {
3596       if (code == FLOOR_DIV_EXPR)
3597         code = TRUNC_DIV_EXPR;
3598       if (code == FLOOR_MOD_EXPR)
3599         code = TRUNC_MOD_EXPR;
3600       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3601         code = TRUNC_DIV_EXPR;
3602     }
3603
3604   if (op1 != const0_rtx)
3605     switch (code)
3606       {
3607       case TRUNC_MOD_EXPR:
3608       case TRUNC_DIV_EXPR:
3609         if (op1_is_constant)
3610           {
3611             if (unsignedp)
3612               {
3613                 unsigned HOST_WIDE_INT mh, ml;
3614                 int pre_shift, post_shift;
3615                 int dummy;
3616                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3617                                             & GET_MODE_MASK (compute_mode));
3618
3619                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3620                   {
3621                     pre_shift = floor_log2 (d);
3622                     if (rem_flag)
3623                       {
3624                         remainder
3625                           = expand_binop (compute_mode, and_optab, op0,
3626                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3627                                           remainder, 1,
3628                                           OPTAB_LIB_WIDEN);
3629                         if (remainder)
3630                           return gen_lowpart (mode, remainder);
3631                       }
3632                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3633                                              build_int_cst (NULL_TREE,
3634                                                             pre_shift),
3635                                              tquotient, 1);
3636                   }
3637                 else if (size <= HOST_BITS_PER_WIDE_INT)
3638                   {
3639                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3640                       {
3641                         /* Most significant bit of divisor is set; emit an scc
3642                            insn.  */
3643                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3644                                                     compute_mode, 1, 1);
3645                         if (quotient == 0)
3646                           goto fail1;
3647                       }
3648                     else
3649                       {
3650                         /* Find a suitable multiplier and right shift count
3651                            instead of multiplying with D.  */
3652
3653                         mh = choose_multiplier (d, size, size,
3654                                                 &ml, &post_shift, &dummy);
3655
3656                         /* If the suggested multiplier is more than SIZE bits,
3657                            we can do better for even divisors, using an
3658                            initial right shift.  */
3659                         if (mh != 0 && (d & 1) == 0)
3660                           {
3661                             pre_shift = floor_log2 (d & -d);
3662                             mh = choose_multiplier (d >> pre_shift, size,
3663                                                     size - pre_shift,
3664                                                     &ml, &post_shift, &dummy);
3665                             gcc_assert (!mh);
3666                           }
3667                         else
3668                           pre_shift = 0;
3669
3670                         if (mh != 0)
3671                           {
3672                             rtx t1, t2, t3, t4;
3673
3674                             if (post_shift - 1 >= BITS_PER_WORD)
3675                               goto fail1;
3676
3677                             extra_cost
3678                               = (shift_cost[compute_mode][post_shift - 1]
3679                                  + shift_cost[compute_mode][1]
3680                                  + 2 * add_cost[compute_mode]);
3681                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3682                                                        NULL_RTX, 1,
3683                                                        max_cost - extra_cost);
3684                             if (t1 == 0)
3685                               goto fail1;
3686                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3687                                                                op0, t1),
3688                                                 NULL_RTX);
3689                             t3 = expand_shift
3690                               (RSHIFT_EXPR, compute_mode, t2,
3691                                build_int_cst (NULL_TREE, 1),
3692                                NULL_RTX,1);
3693                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3694                                                               t1, t3),
3695                                                 NULL_RTX);
3696                             quotient = expand_shift
3697                               (RSHIFT_EXPR, compute_mode, t4,
3698                                build_int_cst (NULL_TREE, post_shift - 1),
3699                                tquotient, 1);
3700                           }
3701                         else
3702                           {
3703                             rtx t1, t2;
3704
3705                             if (pre_shift >= BITS_PER_WORD
3706                                 || post_shift >= BITS_PER_WORD)
3707                               goto fail1;
3708
3709                             t1 = expand_shift
3710                               (RSHIFT_EXPR, compute_mode, op0,
3711                                build_int_cst (NULL_TREE, pre_shift),
3712                                NULL_RTX, 1);
3713                             extra_cost
3714                               = (shift_cost[compute_mode][pre_shift]
3715                                  + shift_cost[compute_mode][post_shift]);
3716                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3717                                                        NULL_RTX, 1,
3718                                                        max_cost - extra_cost);
3719                             if (t2 == 0)
3720                               goto fail1;
3721                             quotient = expand_shift
3722                               (RSHIFT_EXPR, compute_mode, t2,
3723                                build_int_cst (NULL_TREE, post_shift),
3724                                tquotient, 1);
3725                           }
3726                       }
3727                   }
3728                 else            /* Too wide mode to use tricky code */
3729                   break;
3730
3731                 insn = get_last_insn ();
3732                 if (insn != last
3733                     && (set = single_set (insn)) != 0
3734                     && SET_DEST (set) == quotient)
3735                   set_unique_reg_note (insn,
3736                                        REG_EQUAL,
3737                                        gen_rtx_UDIV (compute_mode, op0, op1));
3738               }
3739             else                /* TRUNC_DIV, signed */
3740               {
3741                 unsigned HOST_WIDE_INT ml;
3742                 int lgup, post_shift;
3743                 HOST_WIDE_INT d = INTVAL (op1);
3744                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3745
3746                 /* n rem d = n rem -d */
3747                 if (rem_flag && d < 0)
3748                   {
3749                     d = abs_d;
3750                     op1 = gen_int_mode (abs_d, compute_mode);
3751                   }
3752
3753                 if (d == 1)
3754                   quotient = op0;
3755                 else if (d == -1)
3756                   quotient = expand_unop (compute_mode, neg_optab, op0,
3757                                           tquotient, 0);
3758                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3759                   {
3760                     /* This case is not handled correctly below.  */
3761                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3762                                                 compute_mode, 1, 1);
3763                     if (quotient == 0)
3764                       goto fail1;
3765                   }
3766                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3767                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3768                                       : sdiv_pow2_cheap[compute_mode])
3769                          /* We assume that cheap metric is true if the
3770                             optab has an expander for this mode.  */
3771                          && (((rem_flag ? smod_optab : sdiv_optab)
3772                               ->handlers[compute_mode].insn_code
3773                               != CODE_FOR_nothing)
3774                              || (sdivmod_optab->handlers[compute_mode]
3775                                  .insn_code != CODE_FOR_nothing)))
3776                   ;
3777                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3778                   {
3779                     if (rem_flag)
3780                       {
3781                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3782                         if (remainder)
3783                           return gen_lowpart (mode, remainder);
3784                       }
3785
3786                     if (sdiv_pow2_cheap[compute_mode]
3787                         && ((sdiv_optab->handlers[compute_mode].insn_code
3788                              != CODE_FOR_nothing)
3789                             || (sdivmod_optab->handlers[compute_mode].insn_code
3790                                 != CODE_FOR_nothing)))
3791                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
3792                                                 compute_mode, op0,
3793                                                 gen_int_mode (abs_d,
3794                                                               compute_mode),
3795                                                 NULL_RTX, 0);
3796                     else
3797                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3798
3799                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3800                        negate the quotient.  */
3801                     if (d < 0)
3802                       {
3803                         insn = get_last_insn ();
3804                         if (insn != last
3805                             && (set = single_set (insn)) != 0
3806                             && SET_DEST (set) == quotient
3807                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3808                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3809                           set_unique_reg_note (insn,
3810                                                REG_EQUAL,
3811                                                gen_rtx_DIV (compute_mode,
3812                                                             op0,
3813                                                             GEN_INT
3814                                                             (trunc_int_for_mode
3815                                                              (abs_d,
3816                                                               compute_mode))));
3817
3818                         quotient = expand_unop (compute_mode, neg_optab,
3819                                                 quotient, quotient, 0);
3820                       }
3821                   }
3822                 else if (size <= HOST_BITS_PER_WIDE_INT)
3823                   {
3824                     choose_multiplier (abs_d, size, size - 1,
3825                                        &ml, &post_shift, &lgup);
3826                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3827                       {
3828                         rtx t1, t2, t3;
3829
3830                         if (post_shift >= BITS_PER_WORD
3831                             || size - 1 >= BITS_PER_WORD)
3832                           goto fail1;
3833
3834                         extra_cost = (shift_cost[compute_mode][post_shift]
3835                                       + shift_cost[compute_mode][size - 1]
3836                                       + add_cost[compute_mode]);
3837                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3838                                                    NULL_RTX, 0,
3839                                                    max_cost - extra_cost);
3840                         if (t1 == 0)
3841                           goto fail1;
3842                         t2 = expand_shift
3843                           (RSHIFT_EXPR, compute_mode, t1,
3844                            build_int_cst (NULL_TREE, post_shift),
3845                            NULL_RTX, 0);
3846                         t3 = expand_shift
3847                           (RSHIFT_EXPR, compute_mode, op0,
3848                            build_int_cst (NULL_TREE, size - 1),
3849                            NULL_RTX, 0);
3850                         if (d < 0)
3851                           quotient
3852                             = force_operand (gen_rtx_MINUS (compute_mode,
3853                                                             t3, t2),
3854                                              tquotient);
3855                         else
3856                           quotient
3857                             = force_operand (gen_rtx_MINUS (compute_mode,
3858                                                             t2, t3),
3859                                              tquotient);
3860                       }
3861                     else
3862                       {
3863                         rtx t1, t2, t3, t4;
3864
3865                         if (post_shift >= BITS_PER_WORD
3866                             || size - 1 >= BITS_PER_WORD)
3867                           goto fail1;
3868
3869                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3870                         extra_cost = (shift_cost[compute_mode][post_shift]
3871                                       + shift_cost[compute_mode][size - 1]
3872                                       + 2 * add_cost[compute_mode]);
3873                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3874                                                    NULL_RTX, 0,
3875                                                    max_cost - extra_cost);
3876                         if (t1 == 0)
3877                           goto fail1;
3878                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3879                                                           t1, op0),
3880                                             NULL_RTX);
3881                         t3 = expand_shift
3882                           (RSHIFT_EXPR, compute_mode, t2,
3883                            build_int_cst (NULL_TREE, post_shift),
3884                            NULL_RTX, 0);
3885                         t4 = expand_shift
3886                           (RSHIFT_EXPR, compute_mode, op0,
3887                            build_int_cst (NULL_TREE, size - 1),
3888                            NULL_RTX, 0);
3889                         if (d < 0)
3890                           quotient
3891                             = force_operand (gen_rtx_MINUS (compute_mode,
3892                                                             t4, t3),
3893                                              tquotient);
3894                         else
3895                           quotient
3896                             = force_operand (gen_rtx_MINUS (compute_mode,
3897                                                             t3, t4),
3898                                              tquotient);
3899                       }
3900                   }
3901                 else            /* Too wide mode to use tricky code */
3902                   break;
3903
3904                 insn = get_last_insn ();
3905                 if (insn != last
3906                     && (set = single_set (insn)) != 0
3907                     && SET_DEST (set) == quotient)
3908                   set_unique_reg_note (insn,
3909                                        REG_EQUAL,
3910                                        gen_rtx_DIV (compute_mode, op0, op1));
3911               }
3912             break;
3913           }
3914       fail1:
3915         delete_insns_since (last);
3916         break;
3917
3918       case FLOOR_DIV_EXPR:
3919       case FLOOR_MOD_EXPR:
3920       /* We will come here only for signed operations.  */
3921         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3922           {
3923             unsigned HOST_WIDE_INT mh, ml;
3924             int pre_shift, lgup, post_shift;
3925             HOST_WIDE_INT d = INTVAL (op1);
3926
3927             if (d > 0)
3928               {
3929                 /* We could just as easily deal with negative constants here,
3930                    but it does not seem worth the trouble for GCC 2.6.  */
3931                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3932                   {
3933                     pre_shift = floor_log2 (d);
3934                     if (rem_flag)
3935                       {
3936                         remainder = expand_binop (compute_mode, and_optab, op0,
3937                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3938                                                   remainder, 0, OPTAB_LIB_WIDEN);
3939                         if (remainder)
3940                           return gen_lowpart (mode, remainder);
3941                       }
3942                     quotient = expand_shift
3943                       (RSHIFT_EXPR, compute_mode, op0,
3944                        build_int_cst (NULL_TREE, pre_shift),
3945                        tquotient, 0);
3946                   }
3947                 else
3948                   {
3949                     rtx t1, t2, t3, t4;
3950
3951                     mh = choose_multiplier (d, size, size - 1,
3952                                             &ml, &post_shift, &lgup);
3953                     gcc_assert (!mh);
3954
3955                     if (post_shift < BITS_PER_WORD
3956                         && size - 1 < BITS_PER_WORD)
3957                       {
3958                         t1 = expand_shift
3959                           (RSHIFT_EXPR, compute_mode, op0,
3960                            build_int_cst (NULL_TREE, size - 1),
3961                            NULL_RTX, 0);
3962                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3963                                            NULL_RTX, 0, OPTAB_WIDEN);
3964                         extra_cost = (shift_cost[compute_mode][post_shift]
3965                                       + shift_cost[compute_mode][size - 1]
3966                                       + 2 * add_cost[compute_mode]);
3967                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3968                                                    NULL_RTX, 1,
3969                                                    max_cost - extra_cost);
3970                         if (t3 != 0)
3971                           {
3972                             t4 = expand_shift
3973                               (RSHIFT_EXPR, compute_mode, t3,
3974                                build_int_cst (NULL_TREE, post_shift),
3975                                NULL_RTX, 1);
3976                             quotient = expand_binop (compute_mode, xor_optab,
3977                                                      t4, t1, tquotient, 0,
3978                                                      OPTAB_WIDEN);
3979                           }
3980                       }
3981                   }
3982               }
3983             else
3984               {
3985                 rtx nsign, t1, t2, t3, t4;
3986                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3987                                                   op0, constm1_rtx), NULL_RTX);
3988                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3989                                    0, OPTAB_WIDEN);
3990                 nsign = expand_shift
3991                   (RSHIFT_EXPR, compute_mode, t2,
3992                    build_int_cst (NULL_TREE, size - 1),
3993                    NULL_RTX, 0);
3994                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3995                                     NULL_RTX);
3996                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3997                                     NULL_RTX, 0);
3998                 if (t4)
3999                   {
4000                     rtx t5;
4001                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4002                                       NULL_RTX, 0);
4003                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4004                                                             t4, t5),
4005                                               tquotient);
4006                   }
4007               }
4008           }
4009
4010         if (quotient != 0)
4011           break;
4012         delete_insns_since (last);
4013
4014         /* Try using an instruction that produces both the quotient and
4015            remainder, using truncation.  We can easily compensate the quotient
4016            or remainder to get floor rounding, once we have the remainder.
4017            Notice that we compute also the final remainder value here,
4018            and return the result right away.  */
4019         if (target == 0 || GET_MODE (target) != compute_mode)
4020           target = gen_reg_rtx (compute_mode);
4021
4022         if (rem_flag)
4023           {
4024             remainder
4025               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4026             quotient = gen_reg_rtx (compute_mode);
4027           }
4028         else
4029           {
4030             quotient
4031               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4032             remainder = gen_reg_rtx (compute_mode);
4033           }
4034
4035         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4036                                  quotient, remainder, 0))
4037           {
4038             /* This could be computed with a branch-less sequence.
4039                Save that for later.  */
4040             rtx tem;
4041             rtx label = gen_label_rtx ();
4042             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4043             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4044                                 NULL_RTX, 0, OPTAB_WIDEN);
4045             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4046             expand_dec (quotient, const1_rtx);
4047             expand_inc (remainder, op1);
4048             emit_label (label);
4049             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4050           }
4051
4052         /* No luck with division elimination or divmod.  Have to do it
4053            by conditionally adjusting op0 *and* the result.  */
4054         {
4055           rtx label1, label2, label3, label4, label5;
4056           rtx adjusted_op0;
4057           rtx tem;
4058
4059           quotient = gen_reg_rtx (compute_mode);
4060           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4061           label1 = gen_label_rtx ();
4062           label2 = gen_label_rtx ();
4063           label3 = gen_label_rtx ();
4064           label4 = gen_label_rtx ();
4065           label5 = gen_label_rtx ();
4066           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4067           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4068           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4069                               quotient, 0, OPTAB_LIB_WIDEN);
4070           if (tem != quotient)
4071             emit_move_insn (quotient, tem);
4072           emit_jump_insn (gen_jump (label5));
4073           emit_barrier ();
4074           emit_label (label1);
4075           expand_inc (adjusted_op0, const1_rtx);
4076           emit_jump_insn (gen_jump (label4));
4077           emit_barrier ();
4078           emit_label (label2);
4079           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4080           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4081                               quotient, 0, OPTAB_LIB_WIDEN);
4082           if (tem != quotient)
4083             emit_move_insn (quotient, tem);
4084           emit_jump_insn (gen_jump (label5));
4085           emit_barrier ();
4086           emit_label (label3);
4087           expand_dec (adjusted_op0, const1_rtx);
4088           emit_label (label4);
4089           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4090                               quotient, 0, OPTAB_LIB_WIDEN);
4091           if (tem != quotient)
4092             emit_move_insn (quotient, tem);
4093           expand_dec (quotient, const1_rtx);
4094           emit_label (label5);
4095         }
4096         break;
4097
4098       case CEIL_DIV_EXPR:
4099       case CEIL_MOD_EXPR:
4100         if (unsignedp)
4101           {
4102             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4103               {
4104                 rtx t1, t2, t3;
4105                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4106                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4107                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4108                                    tquotient, 1);
4109                 t2 = expand_binop (compute_mode, and_optab, op0,
4110                                    GEN_INT (d - 1),
4111                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4112                 t3 = gen_reg_rtx (compute_mode);
4113                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4114                                       compute_mode, 1, 1);
4115                 if (t3 == 0)
4116                   {
4117                     rtx lab;
4118                     lab = gen_label_rtx ();
4119                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4120                     expand_inc (t1, const1_rtx);
4121                     emit_label (lab);
4122                     quotient = t1;
4123                   }
4124                 else
4125                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4126                                                           t1, t3),
4127                                             tquotient);
4128                 break;
4129               }
4130
4131             /* Try using an instruction that produces both the quotient and
4132                remainder, using truncation.  We can easily compensate the
4133                quotient or remainder to get ceiling rounding, once we have the
4134                remainder.  Notice that we compute also the final remainder
4135                value here, and return the result right away.  */
4136             if (target == 0 || GET_MODE (target) != compute_mode)
4137               target = gen_reg_rtx (compute_mode);
4138
4139             if (rem_flag)
4140               {
4141                 remainder = (REG_P (target)
4142                              ? target : gen_reg_rtx (compute_mode));
4143                 quotient = gen_reg_rtx (compute_mode);
4144               }
4145             else
4146               {
4147                 quotient = (REG_P (target)
4148                             ? target : gen_reg_rtx (compute_mode));
4149                 remainder = gen_reg_rtx (compute_mode);
4150               }
4151
4152             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4153                                      remainder, 1))
4154               {
4155                 /* This could be computed with a branch-less sequence.
4156                    Save that for later.  */
4157                 rtx label = gen_label_rtx ();
4158                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4159                                  compute_mode, label);
4160                 expand_inc (quotient, const1_rtx);
4161                 expand_dec (remainder, op1);
4162                 emit_label (label);
4163                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4164               }
4165
4166             /* No luck with division elimination or divmod.  Have to do it
4167                by conditionally adjusting op0 *and* the result.  */
4168             {
4169               rtx label1, label2;
4170               rtx adjusted_op0, tem;
4171
4172               quotient = gen_reg_rtx (compute_mode);
4173               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4174               label1 = gen_label_rtx ();
4175               label2 = gen_label_rtx ();
4176               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4177                                compute_mode, label1);
4178               emit_move_insn  (quotient, const0_rtx);
4179               emit_jump_insn (gen_jump (label2));
4180               emit_barrier ();
4181               emit_label (label1);
4182               expand_dec (adjusted_op0, const1_rtx);
4183               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4184                                   quotient, 1, OPTAB_LIB_WIDEN);
4185               if (tem != quotient)
4186                 emit_move_insn (quotient, tem);
4187               expand_inc (quotient, const1_rtx);
4188               emit_label (label2);
4189             }
4190           }
4191         else /* signed */
4192           {
4193             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4194                 && INTVAL (op1) >= 0)
4195               {
4196                 /* This is extremely similar to the code for the unsigned case
4197                    above.  For 2.7 we should merge these variants, but for
4198                    2.6.1 I don't want to touch the code for unsigned since that
4199                    get used in C.  The signed case will only be used by other
4200                    languages (Ada).  */
4201
4202                 rtx t1, t2, t3;
4203                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4204                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4205                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4206                                    tquotient, 0);
4207                 t2 = expand_binop (compute_mode, and_optab, op0,
4208                                    GEN_INT (d - 1),
4209                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4210                 t3 = gen_reg_rtx (compute_mode);
4211                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4212                                       compute_mode, 1, 1);
4213                 if (t3 == 0)
4214                   {
4215                     rtx lab;
4216                     lab = gen_label_rtx ();
4217                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4218                     expand_inc (t1, const1_rtx);
4219                     emit_label (lab);
4220                     quotient = t1;
4221                   }
4222                 else
4223                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4224                                                           t1, t3),
4225                                             tquotient);
4226                 break;
4227               }
4228
4229             /* Try using an instruction that produces both the quotient and
4230                remainder, using truncation.  We can easily compensate the
4231                quotient or remainder to get ceiling rounding, once we have the
4232                remainder.  Notice that we compute also the final remainder
4233                value here, and return the result right away.  */
4234             if (target == 0 || GET_MODE (target) != compute_mode)
4235               target = gen_reg_rtx (compute_mode);
4236             if (rem_flag)
4237               {
4238                 remainder= (REG_P (target)
4239                             ? target : gen_reg_rtx (compute_mode));
4240                 quotient = gen_reg_rtx (compute_mode);
4241               }
4242             else
4243               {
4244                 quotient = (REG_P (target)
4245                             ? target : gen_reg_rtx (compute_mode));
4246                 remainder = gen_reg_rtx (compute_mode);
4247               }
4248
4249             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4250                                      remainder, 0))
4251               {
4252                 /* This could be computed with a branch-less sequence.
4253                    Save that for later.  */
4254                 rtx tem;
4255                 rtx label = gen_label_rtx ();
4256                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4257                                  compute_mode, label);
4258                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4259                                     NULL_RTX, 0, OPTAB_WIDEN);
4260                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4261                 expand_inc (quotient, const1_rtx);
4262                 expand_dec (remainder, op1);
4263                 emit_label (label);
4264                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4265               }
4266
4267             /* No luck with division elimination or divmod.  Have to do it
4268                by conditionally adjusting op0 *and* the result.  */
4269             {
4270               rtx label1, label2, label3, label4, label5;
4271               rtx adjusted_op0;
4272               rtx tem;
4273
4274               quotient = gen_reg_rtx (compute_mode);
4275               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4276               label1 = gen_label_rtx ();
4277               label2 = gen_label_rtx ();
4278               label3 = gen_label_rtx ();
4279               label4 = gen_label_rtx ();
4280               label5 = gen_label_rtx ();
4281               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4282               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4283                                compute_mode, label1);
4284               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4285                                   quotient, 0, OPTAB_LIB_WIDEN);
4286               if (tem != quotient)
4287                 emit_move_insn (quotient, tem);
4288               emit_jump_insn (gen_jump (label5));
4289               emit_barrier ();
4290               emit_label (label1);
4291               expand_dec (adjusted_op0, const1_rtx);
4292               emit_jump_insn (gen_jump (label4));
4293               emit_barrier ();
4294               emit_label (label2);
4295               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4296                                compute_mode, label3);
4297               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4298                                   quotient, 0, OPTAB_LIB_WIDEN);
4299               if (tem != quotient)
4300                 emit_move_insn (quotient, tem);
4301               emit_jump_insn (gen_jump (label5));
4302               emit_barrier ();
4303               emit_label (label3);
4304               expand_inc (adjusted_op0, const1_rtx);
4305               emit_label (label4);
4306               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4307                                   quotient, 0, OPTAB_LIB_WIDEN);
4308               if (tem != quotient)
4309                 emit_move_insn (quotient, tem);
4310               expand_inc (quotient, const1_rtx);
4311               emit_label (label5);
4312             }
4313           }
4314         break;
4315
4316       case EXACT_DIV_EXPR:
4317         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4318           {
4319             HOST_WIDE_INT d = INTVAL (op1);
4320             unsigned HOST_WIDE_INT ml;
4321             int pre_shift;
4322             rtx t1;
4323
4324             pre_shift = floor_log2 (d & -d);
4325             ml = invert_mod2n (d >> pre_shift, size);
4326             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4327                                build_int_cst (NULL_TREE, pre_shift),
4328                                NULL_RTX, unsignedp);
4329             quotient = expand_mult (compute_mode, t1,
4330                                     gen_int_mode (ml, compute_mode),
4331                                     NULL_RTX, 1);
4332
4333             insn = get_last_insn ();
4334             set_unique_reg_note (insn,
4335                                  REG_EQUAL,
4336                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4337                                                  compute_mode,
4338                                                  op0, op1));
4339           }
4340         break;
4341
4342       case ROUND_DIV_EXPR:
4343       case ROUND_MOD_EXPR:
4344         if (unsignedp)
4345           {
4346             rtx tem;
4347             rtx label;
4348             label = gen_label_rtx ();
4349             quotient = gen_reg_rtx (compute_mode);
4350             remainder = gen_reg_rtx (compute_mode);
4351             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4352               {
4353                 rtx tem;
4354                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4355                                          quotient, 1, OPTAB_LIB_WIDEN);
4356                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4357                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4358                                           remainder, 1, OPTAB_LIB_WIDEN);
4359               }
4360             tem = plus_constant (op1, -1);
4361             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4362                                 build_int_cst (NULL_TREE, 1),
4363                                 NULL_RTX, 1);
4364             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4365             expand_inc (quotient, const1_rtx);
4366             expand_dec (remainder, op1);
4367             emit_label (label);
4368           }
4369         else
4370           {
4371             rtx abs_rem, abs_op1, tem, mask;
4372             rtx label;
4373             label = gen_label_rtx ();
4374             quotient = gen_reg_rtx (compute_mode);
4375             remainder = gen_reg_rtx (compute_mode);
4376             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4377               {
4378                 rtx tem;
4379                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4380                                          quotient, 0, OPTAB_LIB_WIDEN);
4381                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4382                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4383                                           remainder, 0, OPTAB_LIB_WIDEN);
4384               }
4385             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4386             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4387             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4388                                 build_int_cst (NULL_TREE, 1),
4389                                 NULL_RTX, 1);
4390             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4391             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4392                                 NULL_RTX, 0, OPTAB_WIDEN);
4393             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4394                                  build_int_cst (NULL_TREE, size - 1),
4395                                  NULL_RTX, 0);
4396             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4397                                 NULL_RTX, 0, OPTAB_WIDEN);
4398             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4399                                 NULL_RTX, 0, OPTAB_WIDEN);
4400             expand_inc (quotient, tem);
4401             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4402                                 NULL_RTX, 0, OPTAB_WIDEN);
4403             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4404                                 NULL_RTX, 0, OPTAB_WIDEN);
4405             expand_dec (remainder, tem);
4406             emit_label (label);
4407           }
4408         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4409
4410       default:
4411         gcc_unreachable ();
4412       }
4413
4414   if (quotient == 0)
4415     {
4416       if (target && GET_MODE (target) != compute_mode)
4417         target = 0;
4418
4419       if (rem_flag)
4420         {
4421           /* Try to produce the remainder without producing the quotient.
4422              If we seem to have a divmod pattern that does not require widening,
4423              don't try widening here.  We should really have a WIDEN argument
4424              to expand_twoval_binop, since what we'd really like to do here is
4425              1) try a mod insn in compute_mode
4426              2) try a divmod insn in compute_mode
4427              3) try a div insn in compute_mode and multiply-subtract to get
4428                 remainder
4429              4) try the same things with widening allowed.  */
4430           remainder
4431             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4432                                  op0, op1, target,
4433                                  unsignedp,
4434                                  ((optab2->handlers[compute_mode].insn_code
4435                                    != CODE_FOR_nothing)
4436                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4437           if (remainder == 0)
4438             {
4439               /* No luck there.  Can we do remainder and divide at once
4440                  without a library call?  */
4441               remainder = gen_reg_rtx (compute_mode);
4442               if (! expand_twoval_binop ((unsignedp
4443                                           ? udivmod_optab
4444                                           : sdivmod_optab),
4445                                          op0, op1,
4446                                          NULL_RTX, remainder, unsignedp))
4447                 remainder = 0;
4448             }
4449
4450           if (remainder)
4451             return gen_lowpart (mode, remainder);
4452         }
4453
4454       /* Produce the quotient.  Try a quotient insn, but not a library call.
4455          If we have a divmod in this mode, use it in preference to widening
4456          the div (for this test we assume it will not fail). Note that optab2
4457          is set to the one of the two optabs that the call below will use.  */
4458       quotient
4459         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4460                              op0, op1, rem_flag ? NULL_RTX : target,
4461                              unsignedp,
4462                              ((optab2->handlers[compute_mode].insn_code
4463                                != CODE_FOR_nothing)
4464                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4465
4466       if (quotient == 0)
4467         {
4468           /* No luck there.  Try a quotient-and-remainder insn,
4469              keeping the quotient alone.  */
4470           quotient = gen_reg_rtx (compute_mode);
4471           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4472                                      op0, op1,
4473                                      quotient, NULL_RTX, unsignedp))
4474             {
4475               quotient = 0;
4476               if (! rem_flag)
4477                 /* Still no luck.  If we are not computing the remainder,
4478                    use a library call for the quotient.  */
4479                 quotient = sign_expand_binop (compute_mode,
4480                                               udiv_optab, sdiv_optab,
4481                                               op0, op1, target,
4482                                               unsignedp, OPTAB_LIB_WIDEN);
4483             }
4484         }
4485     }
4486
4487   if (rem_flag)
4488     {
4489       if (target && GET_MODE (target) != compute_mode)
4490         target = 0;
4491
4492       if (quotient == 0)
4493         {
4494           /* No divide instruction either.  Use library for remainder.  */
4495           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4496                                          op0, op1, target,
4497                                          unsignedp, OPTAB_LIB_WIDEN);
4498           /* No remainder function.  Try a quotient-and-remainder
4499              function, keeping the remainder.  */
4500           if (!remainder)
4501             {
4502               remainder = gen_reg_rtx (compute_mode);
4503               if (!expand_twoval_binop_libfunc
4504                   (unsignedp ? udivmod_optab : sdivmod_optab,
4505                    op0, op1,
4506                    NULL_RTX, remainder,
4507                    unsignedp ? UMOD : MOD))
4508                 remainder = NULL_RTX;
4509             }
4510         }
4511       else
4512         {
4513           /* We divided.  Now finish doing X - Y * (X / Y).  */
4514           remainder = expand_mult (compute_mode, quotient, op1,
4515                                    NULL_RTX, unsignedp);
4516           remainder = expand_binop (compute_mode, sub_optab, op0,
4517                                     remainder, target, unsignedp,
4518                                     OPTAB_LIB_WIDEN);
4519         }
4520     }
4521
4522   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4523 }
4524 \f
4525 /* Return a tree node with data type TYPE, describing the value of X.
4526    Usually this is an VAR_DECL, if there is no obvious better choice.
4527    X may be an expression, however we only support those expressions
4528    generated by loop.c.  */
4529
4530 tree
4531 make_tree (tree type, rtx x)
4532 {
4533   tree t;
4534
4535   switch (GET_CODE (x))
4536     {
4537     case CONST_INT:
4538       {
4539         HOST_WIDE_INT hi = 0;
4540
4541         if (INTVAL (x) < 0
4542             && !(TYPE_UNSIGNED (type)
4543                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4544                      < HOST_BITS_PER_WIDE_INT)))
4545           hi = -1;
4546
4547         t = build_int_cst_wide (type, INTVAL (x), hi);
4548
4549         return t;
4550       }
4551
4552     case CONST_DOUBLE:
4553       if (GET_MODE (x) == VOIDmode)
4554         t = build_int_cst_wide (type,
4555                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4556       else
4557         {
4558           REAL_VALUE_TYPE d;
4559
4560           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4561           t = build_real (type, d);
4562         }
4563
4564       return t;
4565
4566     case CONST_VECTOR:
4567       {
4568         int i, units;
4569         rtx elt;
4570         tree t = NULL_TREE;
4571
4572         units = CONST_VECTOR_NUNITS (x);
4573
4574         /* Build a tree with vector elements.  */
4575         for (i = units - 1; i >= 0; --i)
4576           {
4577             elt = CONST_VECTOR_ELT (x, i);
4578             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4579           }
4580
4581         return build_vector (type, t);
4582       }
4583
4584     case PLUS:
4585       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4586                            make_tree (type, XEXP (x, 1))));
4587
4588     case MINUS:
4589       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4590                            make_tree (type, XEXP (x, 1))));
4591
4592     case NEG:
4593       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4594
4595     case MULT:
4596       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4597                            make_tree (type, XEXP (x, 1))));
4598
4599     case ASHIFT:
4600       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4601                            make_tree (type, XEXP (x, 1))));
4602
4603     case LSHIFTRT:
4604       t = lang_hooks.types.unsigned_type (type);
4605       return fold (convert (type,
4606                             build2 (RSHIFT_EXPR, t,
4607                                     make_tree (t, XEXP (x, 0)),
4608                                     make_tree (type, XEXP (x, 1)))));
4609
4610     case ASHIFTRT:
4611       t = lang_hooks.types.signed_type (type);
4612       return fold (convert (type,
4613                             build2 (RSHIFT_EXPR, t,
4614                                     make_tree (t, XEXP (x, 0)),
4615                                     make_tree (type, XEXP (x, 1)))));
4616
4617     case DIV:
4618       if (TREE_CODE (type) != REAL_TYPE)
4619         t = lang_hooks.types.signed_type (type);
4620       else
4621         t = type;
4622
4623       return fold (convert (type,
4624                             build2 (TRUNC_DIV_EXPR, t,
4625                                     make_tree (t, XEXP (x, 0)),
4626                                     make_tree (t, XEXP (x, 1)))));
4627     case UDIV:
4628       t = lang_hooks.types.unsigned_type (type);
4629       return fold (convert (type,
4630                             build2 (TRUNC_DIV_EXPR, t,
4631                                     make_tree (t, XEXP (x, 0)),
4632                                     make_tree (t, XEXP (x, 1)))));
4633
4634     case SIGN_EXTEND:
4635     case ZERO_EXTEND:
4636       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4637                                           GET_CODE (x) == ZERO_EXTEND);
4638       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4639
4640     default:
4641       t = build_decl (VAR_DECL, NULL_TREE, type);
4642
4643       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4644          ptr_mode.  So convert.  */
4645       if (POINTER_TYPE_P (type))
4646         x = convert_memory_address (TYPE_MODE (type), x);
4647
4648       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4649          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4650       t->decl.rtl = x;
4651
4652       return t;
4653     }
4654 }
4655
4656 /* Check whether the multiplication X * MULT + ADD overflows.
4657    X, MULT and ADD must be CONST_*.
4658    MODE is the machine mode for the computation.
4659    X and MULT must have mode MODE.  ADD may have a different mode.
4660    So can X (defaults to same as MODE).
4661    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4662
4663 bool
4664 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4665                            enum machine_mode mode, int unsignedp)
4666 {
4667   tree type, mult_type, add_type, result;
4668
4669   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4670
4671   /* In order to get a proper overflow indication from an unsigned
4672      type, we have to pretend that it's a sizetype.  */
4673   mult_type = type;
4674   if (unsignedp)
4675     {
4676       /* FIXME:It would be nice if we could step directly from this
4677          type to its sizetype equivalent.  */
4678       mult_type = build_distinct_type_copy (type);
4679       TYPE_IS_SIZETYPE (mult_type) = 1;
4680     }
4681
4682   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4683               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4684
4685   result = fold (build2 (PLUS_EXPR, mult_type,
4686                          fold (build2 (MULT_EXPR, mult_type,
4687                                        make_tree (mult_type, x),
4688                                        make_tree (mult_type, mult))),
4689                          make_tree (add_type, add)));
4690
4691   return TREE_CONSTANT_OVERFLOW (result);
4692 }
4693
4694 /* Return an rtx representing the value of X * MULT + ADD.
4695    TARGET is a suggestion for where to store the result (an rtx).
4696    MODE is the machine mode for the computation.
4697    X and MULT must have mode MODE.  ADD may have a different mode.
4698    So can X (defaults to same as MODE).
4699    UNSIGNEDP is nonzero to do unsigned multiplication.
4700    This may emit insns.  */
4701
4702 rtx
4703 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4704                  int unsignedp)
4705 {
4706   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4707   tree add_type = (GET_MODE (add) == VOIDmode
4708                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4709                                                            unsignedp));
4710   tree result =  fold (build2 (PLUS_EXPR, type,
4711                                fold (build2 (MULT_EXPR, type,
4712                                              make_tree (type, x),
4713                                              make_tree (type, mult))),
4714                                make_tree (add_type, add)));
4715
4716   return expand_expr (result, target, VOIDmode, 0);
4717 }
4718 \f
4719 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4720    and returning TARGET.
4721
4722    If TARGET is 0, a pseudo-register or constant is returned.  */
4723
4724 rtx
4725 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4726 {
4727   rtx tem = 0;
4728
4729   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4730     tem = simplify_binary_operation (AND, mode, op0, op1);
4731   if (tem == 0)
4732     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4733
4734   if (target == 0)
4735     target = tem;
4736   else if (tem != target)
4737     emit_move_insn (target, tem);
4738   return target;
4739 }
4740 \f
4741 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4742    and storing in TARGET.  Normally return TARGET.
4743    Return 0 if that cannot be done.
4744
4745    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4746    it is VOIDmode, they cannot both be CONST_INT.
4747
4748    UNSIGNEDP is for the case where we have to widen the operands
4749    to perform the operation.  It says to use zero-extension.
4750
4751    NORMALIZEP is 1 if we should convert the result to be either zero
4752    or one.  Normalize is -1 if we should convert the result to be
4753    either zero or -1.  If NORMALIZEP is zero, the result will be left
4754    "raw" out of the scc insn.  */
4755
4756 rtx
4757 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4758                  enum machine_mode mode, int unsignedp, int normalizep)
4759 {
4760   rtx subtarget;
4761   enum insn_code icode;
4762   enum machine_mode compare_mode;
4763   enum machine_mode target_mode = GET_MODE (target);
4764   rtx tem;
4765   rtx last = get_last_insn ();
4766   rtx pattern, comparison;
4767
4768   if (unsignedp)
4769     code = unsigned_condition (code);
4770
4771   /* If one operand is constant, make it the second one.  Only do this
4772      if the other operand is not constant as well.  */
4773
4774   if (swap_commutative_operands_p (op0, op1))
4775     {
4776       tem = op0;
4777       op0 = op1;
4778       op1 = tem;
4779       code = swap_condition (code);
4780     }
4781
4782   if (mode == VOIDmode)
4783     mode = GET_MODE (op0);
4784
4785   /* For some comparisons with 1 and -1, we can convert this to
4786      comparisons with zero.  This will often produce more opportunities for
4787      store-flag insns.  */
4788
4789   switch (code)
4790     {
4791     case LT:
4792       if (op1 == const1_rtx)
4793         op1 = const0_rtx, code = LE;
4794       break;
4795     case LE:
4796       if (op1 == constm1_rtx)
4797         op1 = const0_rtx, code = LT;
4798       break;
4799     case GE:
4800       if (op1 == const1_rtx)
4801         op1 = const0_rtx, code = GT;
4802       break;
4803     case GT:
4804       if (op1 == constm1_rtx)
4805         op1 = const0_rtx, code = GE;
4806       break;
4807     case GEU:
4808       if (op1 == const1_rtx)
4809         op1 = const0_rtx, code = NE;
4810       break;
4811     case LTU:
4812       if (op1 == const1_rtx)
4813         op1 = const0_rtx, code = EQ;
4814       break;
4815     default:
4816       break;
4817     }
4818
4819   /* If we are comparing a double-word integer with zero or -1, we can
4820      convert the comparison into one involving a single word.  */
4821   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4822       && GET_MODE_CLASS (mode) == MODE_INT
4823       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4824     {
4825       if ((code == EQ || code == NE)
4826           && (op1 == const0_rtx || op1 == constm1_rtx))
4827         {
4828           rtx op00, op01, op0both;
4829
4830           /* Do a logical OR or AND of the two words and compare the result.  */
4831           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4832           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4833           op0both = expand_binop (word_mode,
4834                                   op1 == const0_rtx ? ior_optab : and_optab,
4835                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4836
4837           if (op0both != 0)
4838             return emit_store_flag (target, code, op0both, op1, word_mode,
4839                                     unsignedp, normalizep);
4840         }
4841       else if ((code == LT || code == GE) && op1 == const0_rtx)
4842         {
4843           rtx op0h;
4844
4845           /* If testing the sign bit, can just test on high word.  */
4846           op0h = simplify_gen_subreg (word_mode, op0, mode,
4847                                       subreg_highpart_offset (word_mode, mode));
4848           return emit_store_flag (target, code, op0h, op1, word_mode,
4849                                   unsignedp, normalizep);
4850         }
4851     }
4852
4853   /* From now on, we won't change CODE, so set ICODE now.  */
4854   icode = setcc_gen_code[(int) code];
4855
4856   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4857      complement of A (for GE) and shifting the sign bit to the low bit.  */
4858   if (op1 == const0_rtx && (code == LT || code == GE)
4859       && GET_MODE_CLASS (mode) == MODE_INT
4860       && (normalizep || STORE_FLAG_VALUE == 1
4861           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4862               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4863                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4864     {
4865       subtarget = target;
4866
4867       /* If the result is to be wider than OP0, it is best to convert it
4868          first.  If it is to be narrower, it is *incorrect* to convert it
4869          first.  */
4870       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4871         {
4872           op0 = convert_modes (target_mode, mode, op0, 0);
4873           mode = target_mode;
4874         }
4875
4876       if (target_mode != mode)
4877         subtarget = 0;
4878
4879       if (code == GE)
4880         op0 = expand_unop (mode, one_cmpl_optab, op0,
4881                            ((STORE_FLAG_VALUE == 1 || normalizep)
4882                             ? 0 : subtarget), 0);
4883
4884       if (STORE_FLAG_VALUE == 1 || normalizep)
4885         /* If we are supposed to produce a 0/1 value, we want to do
4886            a logical shift from the sign bit to the low-order bit; for
4887            a -1/0 value, we do an arithmetic shift.  */
4888         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4889                             size_int (GET_MODE_BITSIZE (mode) - 1),
4890                             subtarget, normalizep != -1);
4891
4892       if (mode != target_mode)
4893         op0 = convert_modes (target_mode, mode, op0, 0);
4894
4895       return op0;
4896     }
4897
4898   if (icode != CODE_FOR_nothing)
4899     {
4900       insn_operand_predicate_fn pred;
4901
4902       /* We think we may be able to do this with a scc insn.  Emit the
4903          comparison and then the scc insn.  */
4904
4905       do_pending_stack_adjust ();
4906       last = get_last_insn ();
4907
4908       comparison
4909         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4910       if (CONSTANT_P (comparison))
4911         {
4912           switch (GET_CODE (comparison))
4913             {
4914             case CONST_INT:
4915               if (comparison == const0_rtx)
4916                 return const0_rtx;
4917               break;
4918
4919 #ifdef FLOAT_STORE_FLAG_VALUE
4920             case CONST_DOUBLE:
4921               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4922                 return const0_rtx;
4923               break;
4924 #endif
4925             default:
4926               gcc_unreachable ();
4927             }
4928
4929           if (normalizep == 1)
4930             return const1_rtx;
4931           if (normalizep == -1)
4932             return constm1_rtx;
4933           return const_true_rtx;
4934         }
4935
4936       /* The code of COMPARISON may not match CODE if compare_from_rtx
4937          decided to swap its operands and reverse the original code.
4938
4939          We know that compare_from_rtx returns either a CONST_INT or
4940          a new comparison code, so it is safe to just extract the
4941          code from COMPARISON.  */
4942       code = GET_CODE (comparison);
4943
4944       /* Get a reference to the target in the proper mode for this insn.  */
4945       compare_mode = insn_data[(int) icode].operand[0].mode;
4946       subtarget = target;
4947       pred = insn_data[(int) icode].operand[0].predicate;
4948       if (optimize || ! (*pred) (subtarget, compare_mode))
4949         subtarget = gen_reg_rtx (compare_mode);
4950
4951       pattern = GEN_FCN (icode) (subtarget);
4952       if (pattern)
4953         {
4954           emit_insn (pattern);
4955
4956           /* If we are converting to a wider mode, first convert to
4957              TARGET_MODE, then normalize.  This produces better combining
4958              opportunities on machines that have a SIGN_EXTRACT when we are
4959              testing a single bit.  This mostly benefits the 68k.
4960
4961              If STORE_FLAG_VALUE does not have the sign bit set when
4962              interpreted in COMPARE_MODE, we can do this conversion as
4963              unsigned, which is usually more efficient.  */
4964           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4965             {
4966               convert_move (target, subtarget,
4967                             (GET_MODE_BITSIZE (compare_mode)
4968                              <= HOST_BITS_PER_WIDE_INT)
4969                             && 0 == (STORE_FLAG_VALUE
4970                                      & ((HOST_WIDE_INT) 1
4971                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4972               op0 = target;
4973               compare_mode = target_mode;
4974             }
4975           else
4976             op0 = subtarget;
4977
4978           /* If we want to keep subexpressions around, don't reuse our
4979              last target.  */
4980
4981           if (optimize)
4982             subtarget = 0;
4983
4984           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4985              we don't have to do anything.  */
4986           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4987             ;
4988           /* STORE_FLAG_VALUE might be the most negative number, so write
4989              the comparison this way to avoid a compiler-time warning.  */
4990           else if (- normalizep == STORE_FLAG_VALUE)
4991             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4992
4993           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4994              makes it hard to use a value of just the sign bit due to
4995              ANSI integer constant typing rules.  */
4996           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4997                    && (STORE_FLAG_VALUE
4998                        & ((HOST_WIDE_INT) 1
4999                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5000             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5001                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5002                                 subtarget, normalizep == 1);
5003           else
5004             {
5005               gcc_assert (STORE_FLAG_VALUE & 1);
5006
5007               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5008               if (normalizep == -1)
5009                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5010             }
5011
5012           /* If we were converting to a smaller mode, do the
5013              conversion now.  */
5014           if (target_mode != compare_mode)
5015             {
5016               convert_move (target, op0, 0);
5017               return target;
5018             }
5019           else
5020             return op0;
5021         }
5022     }
5023
5024   delete_insns_since (last);
5025
5026   /* If optimizing, use different pseudo registers for each insn, instead
5027      of reusing the same pseudo.  This leads to better CSE, but slows
5028      down the compiler, since there are more pseudos */
5029   subtarget = (!optimize
5030                && (target_mode == mode)) ? target : NULL_RTX;
5031
5032   /* If we reached here, we can't do this with a scc insn.  However, there
5033      are some comparisons that can be done directly.  For example, if
5034      this is an equality comparison of integers, we can try to exclusive-or
5035      (or subtract) the two operands and use a recursive call to try the
5036      comparison with zero.  Don't do any of these cases if branches are
5037      very cheap.  */
5038
5039   if (BRANCH_COST > 0
5040       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5041       && op1 != const0_rtx)
5042     {
5043       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5044                           OPTAB_WIDEN);
5045
5046       if (tem == 0)
5047         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5048                             OPTAB_WIDEN);
5049       if (tem != 0)
5050         tem = emit_store_flag (target, code, tem, const0_rtx,
5051                                mode, unsignedp, normalizep);
5052       if (tem == 0)
5053         delete_insns_since (last);
5054       return tem;
5055     }
5056
5057   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5058      the constant zero.  Reject all other comparisons at this point.  Only
5059      do LE and GT if branches are expensive since they are expensive on
5060      2-operand machines.  */
5061
5062   if (BRANCH_COST == 0
5063       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5064       || (code != EQ && code != NE
5065           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5066     return 0;
5067
5068   /* See what we need to return.  We can only return a 1, -1, or the
5069      sign bit.  */
5070
5071   if (normalizep == 0)
5072     {
5073       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5074         normalizep = STORE_FLAG_VALUE;
5075
5076       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5077                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5078                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5079         ;
5080       else
5081         return 0;
5082     }
5083
5084   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5085      do the necessary operation below.  */
5086
5087   tem = 0;
5088
5089   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5090      the sign bit set.  */
5091
5092   if (code == LE)
5093     {
5094       /* This is destructive, so SUBTARGET can't be OP0.  */
5095       if (rtx_equal_p (subtarget, op0))
5096         subtarget = 0;
5097
5098       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5099                           OPTAB_WIDEN);
5100       if (tem)
5101         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5102                             OPTAB_WIDEN);
5103     }
5104
5105   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5106      number of bits in the mode of OP0, minus one.  */
5107
5108   if (code == GT)
5109     {
5110       if (rtx_equal_p (subtarget, op0))
5111         subtarget = 0;
5112
5113       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5114                           size_int (GET_MODE_BITSIZE (mode) - 1),
5115                           subtarget, 0);
5116       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5117                           OPTAB_WIDEN);
5118     }
5119
5120   if (code == EQ || code == NE)
5121     {
5122       /* For EQ or NE, one way to do the comparison is to apply an operation
5123          that converts the operand into a positive number if it is nonzero
5124          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5125          for NE we negate.  This puts the result in the sign bit.  Then we
5126          normalize with a shift, if needed.
5127
5128          Two operations that can do the above actions are ABS and FFS, so try
5129          them.  If that doesn't work, and MODE is smaller than a full word,
5130          we can use zero-extension to the wider mode (an unsigned conversion)
5131          as the operation.  */
5132
5133       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5134          that is compensated by the subsequent overflow when subtracting
5135          one / negating.  */
5136
5137       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5138         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5139       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5140         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5141       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5142         {
5143           tem = convert_modes (word_mode, mode, op0, 1);
5144           mode = word_mode;
5145         }
5146
5147       if (tem != 0)
5148         {
5149           if (code == EQ)
5150             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5151                                 0, OPTAB_WIDEN);
5152           else
5153             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5154         }
5155
5156       /* If we couldn't do it that way, for NE we can "or" the two's complement
5157          of the value with itself.  For EQ, we take the one's complement of
5158          that "or", which is an extra insn, so we only handle EQ if branches
5159          are expensive.  */
5160
5161       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5162         {
5163           if (rtx_equal_p (subtarget, op0))
5164             subtarget = 0;
5165
5166           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5167           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5168                               OPTAB_WIDEN);
5169
5170           if (tem && code == EQ)
5171             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5172         }
5173     }
5174
5175   if (tem && normalizep)
5176     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5177                         size_int (GET_MODE_BITSIZE (mode) - 1),
5178                         subtarget, normalizep == 1);
5179
5180   if (tem)
5181     {
5182       if (GET_MODE (tem) != target_mode)
5183         {
5184           convert_move (target, tem, 0);
5185           tem = target;
5186         }
5187       else if (!subtarget)
5188         {
5189           emit_move_insn (target, tem);
5190           tem = target;
5191         }
5192     }
5193   else
5194     delete_insns_since (last);
5195
5196   return tem;
5197 }
5198
5199 /* Like emit_store_flag, but always succeeds.  */
5200
5201 rtx
5202 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5203                        enum machine_mode mode, int unsignedp, int normalizep)
5204 {
5205   rtx tem, label;
5206
5207   /* First see if emit_store_flag can do the job.  */
5208   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5209   if (tem != 0)
5210     return tem;
5211
5212   if (normalizep == 0)
5213     normalizep = 1;
5214
5215   /* If this failed, we have to do this with set/compare/jump/set code.  */
5216
5217   if (!REG_P (target)
5218       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5219     target = gen_reg_rtx (GET_MODE (target));
5220
5221   emit_move_insn (target, const1_rtx);
5222   label = gen_label_rtx ();
5223   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5224                            NULL_RTX, label);
5225
5226   emit_move_insn (target, const0_rtx);
5227   emit_label (label);
5228
5229   return target;
5230 }
5231 \f
5232 /* Perform possibly multi-word comparison and conditional jump to LABEL
5233    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5234
5235    The algorithm is based on the code in expr.c:do_jump.
5236
5237    Note that this does not perform a general comparison.  Only variants
5238    generated within expmed.c are correctly handled, others abort (but could
5239    be handled if needed).  */
5240
5241 static void
5242 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5243                  rtx label)
5244 {
5245   /* If this mode is an integer too wide to compare properly,
5246      compare word by word.  Rely on cse to optimize constant cases.  */
5247
5248   if (GET_MODE_CLASS (mode) == MODE_INT
5249       && ! can_compare_p (op, mode, ccp_jump))
5250     {
5251       rtx label2 = gen_label_rtx ();
5252
5253       switch (op)
5254         {
5255         case LTU:
5256           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5257           break;
5258
5259         case LEU:
5260           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5261           break;
5262
5263         case LT:
5264           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5265           break;
5266
5267         case GT:
5268           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5269           break;
5270
5271         case GE:
5272           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5273           break;
5274
5275           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5276              that's the only equality operations we do */
5277         case EQ:
5278           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5279           do_jump_by_parts_equality_rtx (arg1, label2, label);
5280           break;
5281
5282         case NE:
5283           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5284           do_jump_by_parts_equality_rtx (arg1, label, label2);
5285           break;
5286
5287         default:
5288           gcc_unreachable ();
5289         }
5290
5291       emit_label (label2);
5292     }
5293   else
5294     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5295 }