gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   if (flag_force_mem)
 412     {
 413       int old_generating_concat_p = generating_concat_p;
 414       generating_concat_p = 0;
 415       value = force_not_mem (value);
 416       generating_concat_p = old_generating_concat_p;
 417     }
 418
 419   /* If the target is a register, overwriting the entire object, or storing
 420      a full-word or multi-word field can be done with just a SUBREG.
 421
 422      If the target is memory, storing any naturally aligned field can be
 423      done with a simple store.  For targets that support fast unaligned
 424      memory, any naturally sized, unit aligned field can be done directly.  */
 425
 426   offset = bitnum / unit;
 427   bitpos = bitnum % unit;
 428   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 429                 + (offset * UNITS_PER_WORD);
 430
 431   if (bitpos == 0
 432       && bitsize == GET_MODE_BITSIZE (fieldmode)
 433       && (!MEM_P (op0)
 434           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 435              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 436              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (GET_MODE (op0) != fieldmode)
 442         {
 443           if (MEM_P (op0))
 444             op0 = adjust_address (op0, fieldmode, offset);
 445           else
 446             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 447                                        byte_offset);
 448         }
 449       emit_move_insn (op0, value);
 450       return value;
 451     }
 452
 453   /* Make sure we are playing with integral modes.  Pun with subregs
 454      if we aren't.  This must come after the entire register case above,
 455      since that case is valid for any mode.  The following cases are only
 456      valid for integral modes.  */
 457   {
 458     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 459     if (imode != GET_MODE (op0))
 460       {
 461         if (MEM_P (op0))
 462           op0 = adjust_address (op0, imode, 0);
 463         else
 464           {
 465             gcc_assert (imode != BLKmode);
 466             op0 = gen_lowpart (imode, op0);
 467           }
 468       }
 469   }
 470
 471   /* We may be accessing data outside the field, which means
 472      we can alias adjacent data.  */
 473   if (MEM_P (op0))
 474     {
 475       op0 = shallow_copy_rtx (op0);
 476       set_mem_alias_set (op0, 0);
 477       set_mem_expr (op0, 0);
 478     }
 479
 480   /* If OP0 is a register, BITPOS must count within a word.
 481      But as we have it, it counts within whatever size OP0 now has.
 482      On a bigendian machine, these are not the same, so convert.  */
 483   if (BYTES_BIG_ENDIAN
 484       && !MEM_P (op0)
 485       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 486     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 487
 488   /* Storing an lsb-aligned field in a register
 489      can be done with a movestrict instruction.  */
 490
 491   if (!MEM_P (op0)
 492       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 493       && bitsize == GET_MODE_BITSIZE (fieldmode)
 494       && (movstrict_optab->handlers[fieldmode].insn_code
 495           != CODE_FOR_nothing))
 496     {
 497       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 498
 499       /* Get appropriate low part of the value being stored.  */
 500       if (GET_CODE (value) == CONST_INT || REG_P (value))
 501         value = gen_lowpart (fieldmode, value);
 502       else if (!(GET_CODE (value) == SYMBOL_REF
 503                  || GET_CODE (value) == LABEL_REF
 504                  || GET_CODE (value) == CONST))
 505         value = convert_to_mode (fieldmode, value, 0);
 506
 507       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 508         value = copy_to_mode_reg (fieldmode, value);
 509
 510       if (GET_CODE (op0) == SUBREG)
 511         {
 512           /* Else we've got some float mode source being extracted into
 513              a different float mode destination -- this combination of
 514              subregs results in Severe Tire Damage.  */
 515           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 516                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 517                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 518           op0 = SUBREG_REG (op0);
 519         }
 520
 521       emit_insn (GEN_FCN (icode)
 522                  (gen_rtx_SUBREG (fieldmode, op0,
 523                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 524                                   + (offset * UNITS_PER_WORD)),
 525                                   value));
 526
 527       return value;
 528     }
 529
 530   /* Handle fields bigger than a word.  */
 531
 532   if (bitsize > BITS_PER_WORD)
 533     {
 534       /* Here we transfer the words of the field
 535          in the order least significant first.
 536          This is because the most significant word is the one which may
 537          be less than full.
 538          However, only do that if the value is not BLKmode.  */
 539
 540       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 541       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 542       unsigned int i;
 543
 544       /* This is the mode we must force value to, so that there will be enough
 545          subwords to extract.  Note that fieldmode will often (always?) be
 546          VOIDmode, because that is what store_field uses to indicate that this
 547          is a bit field, but passing VOIDmode to operand_subword_force will
 548          result in an abort.  */
 549       fieldmode = GET_MODE (value);
 550       if (fieldmode == VOIDmode)
 551         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 552
 553       for (i = 0; i < nwords; i++)
 554         {
 555           /* If I is 0, use the low-order word in both field and target;
 556              if I is 1, use the next to lowest word; and so on.  */
 557           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 558           unsigned int bit_offset = (backwards
 559                                      ? MAX ((int) bitsize - ((int) i + 1)
 560                                             * BITS_PER_WORD,
 561                                             0)
 562                                      : (int) i * BITS_PER_WORD);
 563
 564           store_bit_field (op0, MIN (BITS_PER_WORD,
 565                                      bitsize - i * BITS_PER_WORD),
 566                            bitnum + bit_offset, word_mode,
 567                            operand_subword_force (value, wordnum, fieldmode));
 568         }
 569       return value;
 570     }
 571
 572   /* From here on we can assume that the field to be stored in is
 573      a full-word (whatever type that is), since it is shorter than a word.  */
 574
 575   /* OFFSET is the number of words or bytes (UNIT says which)
 576      from STR_RTX to the first word or byte containing part of the field.  */
 577
 578   if (!MEM_P (op0))
 579     {
 580       if (offset != 0
 581           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 582         {
 583           if (!REG_P (op0))
 584             {
 585               /* Since this is a destination (lvalue), we can't copy it to a
 586                  pseudo.  We can trivially remove a SUBREG that does not
 587                  change the size of the operand.  Such a SUBREG may have been
 588                  added above.  Otherwise, abort.  */
 589               gcc_assert (GET_CODE (op0) == SUBREG
 590                           && (GET_MODE_SIZE (GET_MODE (op0))
 591                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 592               op0 = SUBREG_REG (op0);
 593             }
 594           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 595                                 op0, (offset * UNITS_PER_WORD));
 596         }
 597       offset = 0;
 598     }
 599
 600   /* If VALUE has a floating-point or complex mode, access it as an
 601      integer of the corresponding size.  This can occur on a machine
 602      with 64 bit registers that uses SFmode for float.  It can also
 603      occur for unaligned float or complex fields.  */
 604   orig_value = value;
 605   if (GET_MODE (value) != VOIDmode
 606       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 607       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 608     {
 609       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 610       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 611     }
 612
 613   /* Now OFFSET is nonzero only if OP0 is memory
 614      and is therefore always measured in bytes.  */
 615
 616   if (HAVE_insv
 617       && GET_MODE (value) != BLKmode
 618       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 619       /* Ensure insv's size is wide enough for this field.  */
 620       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 621       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 622             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 623     {
 624       int xbitpos = bitpos;
 625       rtx value1;
 626       rtx xop0 = op0;
 627       rtx last = get_last_insn ();
 628       rtx pat;
 629       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 630       int save_volatile_ok = volatile_ok;
 631
 632       volatile_ok = 1;
 633
 634       /* If this machine's insv can only insert into a register, copy OP0
 635          into a register and save it back later.  */
 636       /* This used to check flag_force_mem, but that was a serious
 637          de-optimization now that flag_force_mem is enabled by -O2.  */
 638       if (MEM_P (op0)
 639           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 640                 (op0, VOIDmode)))
 641         {
 642           rtx tempreg;
 643           enum machine_mode bestmode;
 644
 645           /* Get the mode to use for inserting into this field.  If OP0 is
 646              BLKmode, get the smallest mode consistent with the alignment. If
 647              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 648              mode. Otherwise, use the smallest mode containing the field.  */
 649
 650           if (GET_MODE (op0) == BLKmode
 651               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 652             bestmode
 653               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 654                                MEM_VOLATILE_P (op0));
 655           else
 656             bestmode = GET_MODE (op0);
 657
 658           if (bestmode == VOIDmode
 659               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 660                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 661             goto insv_loses;
 662
 663           /* Adjust address to point to the containing unit of that mode.
 664              Compute offset as multiple of this unit, counting in bytes.  */
 665           unit = GET_MODE_BITSIZE (bestmode);
 666           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 667           bitpos = bitnum % unit;
 668           op0 = adjust_address (op0, bestmode,  offset);
 669
 670           /* Fetch that unit, store the bitfield in it, then store
 671              the unit.  */
 672           tempreg = copy_to_reg (op0);
 673           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 674           emit_move_insn (op0, tempreg);
 675           return value;
 676         }
 677       volatile_ok = save_volatile_ok;
 678
 679       /* Add OFFSET into OP0's address.  */
 680       if (MEM_P (xop0))
 681         xop0 = adjust_address (xop0, byte_mode, offset);
 682
 683       /* If xop0 is a register, we need it in MAXMODE
 684          to make it acceptable to the format of insv.  */
 685       if (GET_CODE (xop0) == SUBREG)
 686         /* We can't just change the mode, because this might clobber op0,
 687            and we will need the original value of op0 if insv fails.  */
 688         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 689       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 690         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 691
 692       /* On big-endian machines, we count bits from the most significant.
 693          If the bit field insn does not, we must invert.  */
 694
 695       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 696         xbitpos = unit - bitsize - xbitpos;
 697
 698       /* We have been counting XBITPOS within UNIT.
 699          Count instead within the size of the register.  */
 700       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 701         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 702
 703       unit = GET_MODE_BITSIZE (maxmode);
 704
 705       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 706       value1 = value;
 707       if (GET_MODE (value) != maxmode)
 708         {
 709           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 710             {
 711               /* Optimization: Don't bother really extending VALUE
 712                  if it has all the bits we will actually use.  However,
 713                  if we must narrow it, be sure we do it correctly.  */
 714
 715               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 716                 {
 717                   rtx tmp;
 718
 719                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 720                   if (! tmp)
 721                     tmp = simplify_gen_subreg (maxmode,
 722                                                force_reg (GET_MODE (value),
 723                                                           value1),
 724                                                GET_MODE (value), 0);
 725                   value1 = tmp;
 726                 }
 727               else
 728                 value1 = gen_lowpart (maxmode, value1);
 729             }
 730           else if (GET_CODE (value) == CONST_INT)
 731             value1 = gen_int_mode (INTVAL (value), maxmode);
 732           else
 733             /* Parse phase is supposed to make VALUE's data type
 734                match that of the component reference, which is a type
 735                at least as wide as the field; so VALUE should have
 736                a mode that corresponds to that type.  */
 737             gcc_assert (CONSTANT_P (value));
 738         }
 739
 740       /* If this machine's insv insists on a register,
 741          get VALUE1 into a register.  */
 742       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 743              (value1, maxmode)))
 744         value1 = force_reg (maxmode, value1);
 745
 746       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 747       if (pat)
 748         emit_insn (pat);
 749       else
 750         {
 751           delete_insns_since (last);
 752           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 753         }
 754     }
 755   else
 756     insv_loses:
 757     /* Insv is not available; store using shifts and boolean ops.  */
 758     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 759   return value;
 760 }
 761 \f
 762 /* Use shifts and boolean operations to store VALUE
 763    into a bit field of width BITSIZE
 764    in a memory location specified by OP0 except offset by OFFSET bytes.
 765      (OFFSET must be 0 if OP0 is a register.)
 766    The field starts at position BITPOS within the byte.
 767     (If OP0 is a register, it may be a full word or a narrower mode,
 768      but BITPOS still counts within a full word,
 769      which is significant on bigendian machines.)  */
 770
 771 static void
 772 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 773                        unsigned HOST_WIDE_INT bitsize,
 774                        unsigned HOST_WIDE_INT bitpos, rtx value)
 775 {
 776   enum machine_mode mode;
 777   unsigned int total_bits = BITS_PER_WORD;
 778   rtx subtarget, temp;
 779   int all_zero = 0;
 780   int all_one = 0;
 781
 782   /* There is a case not handled here:
 783      a structure with a known alignment of just a halfword
 784      and a field split across two aligned halfwords within the structure.
 785      Or likewise a structure with a known alignment of just a byte
 786      and a field split across two bytes.
 787      Such cases are not supposed to be able to occur.  */
 788
 789   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 790     {
 791       gcc_assert (!offset);
 792       /* Special treatment for a bit field split across two registers.  */
 793       if (bitsize + bitpos > BITS_PER_WORD)
 794         {
 795           store_split_bit_field (op0, bitsize, bitpos, value);
 796           return;
 797         }
 798     }
 799   else
 800     {
 801       /* Get the proper mode to use for this field.  We want a mode that
 802          includes the entire field.  If such a mode would be larger than
 803          a word, we won't be doing the extraction the normal way.
 804          We don't want a mode bigger than the destination.  */
 805
 806       mode = GET_MODE (op0);
 807       if (GET_MODE_BITSIZE (mode) == 0
 808           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 809         mode = word_mode;
 810       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 811                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 812
 813       if (mode == VOIDmode)
 814         {
 815           /* The only way this should occur is if the field spans word
 816              boundaries.  */
 817           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 818                                  value);
 819           return;
 820         }
 821
 822       total_bits = GET_MODE_BITSIZE (mode);
 823
 824       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 825          be in the range 0 to total_bits-1, and put any excess bytes in
 826          OFFSET.  */
 827       if (bitpos >= total_bits)
 828         {
 829           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 830           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 831                      * BITS_PER_UNIT);
 832         }
 833
 834       /* Get ref to an aligned byte, halfword, or word containing the field.
 835          Adjust BITPOS to be position within a word,
 836          and OFFSET to be the offset of that word.
 837          Then alter OP0 to refer to that word.  */
 838       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 839       offset -= (offset % (total_bits / BITS_PER_UNIT));
 840       op0 = adjust_address (op0, mode, offset);
 841     }
 842
 843   mode = GET_MODE (op0);
 844
 845   /* Now MODE is either some integral mode for a MEM as OP0,
 846      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 847      The bit field is contained entirely within OP0.
 848      BITPOS is the starting bit number within OP0.
 849      (OP0's mode may actually be narrower than MODE.)  */
 850
 851   if (BYTES_BIG_ENDIAN)
 852       /* BITPOS is the distance between our msb
 853          and that of the containing datum.
 854          Convert it to the distance from the lsb.  */
 855       bitpos = total_bits - bitsize - bitpos;
 856
 857   /* Now BITPOS is always the distance between our lsb
 858      and that of OP0.  */
 859
 860   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 861      we must first convert its mode to MODE.  */
 862
 863   if (GET_CODE (value) == CONST_INT)
 864     {
 865       HOST_WIDE_INT v = INTVAL (value);
 866
 867       if (bitsize < HOST_BITS_PER_WIDE_INT)
 868         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 869
 870       if (v == 0)
 871         all_zero = 1;
 872       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 873                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 874                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 875         all_one = 1;
 876
 877       value = lshift_value (mode, value, bitpos, bitsize);
 878     }
 879   else
 880     {
 881       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 882                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 883
 884       if (GET_MODE (value) != mode)
 885         {
 886           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 887               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 888             value = gen_lowpart (mode, value);
 889           else
 890             value = convert_to_mode (mode, value, 1);
 891         }
 892
 893       if (must_and)
 894         value = expand_binop (mode, and_optab, value,
 895                               mask_rtx (mode, 0, bitsize, 0),
 896                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 897       if (bitpos > 0)
 898         value = expand_shift (LSHIFT_EXPR, mode, value,
 899                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 900     }
 901
 902   /* Now clear the chosen bits in OP0,
 903      except that if VALUE is -1 we need not bother.  */
 904
 905   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 906
 907   if (! all_one)
 908     {
 909       temp = expand_binop (mode, and_optab, op0,
 910                            mask_rtx (mode, bitpos, bitsize, 1),
 911                            subtarget, 1, OPTAB_LIB_WIDEN);
 912       subtarget = temp;
 913     }
 914   else
 915     temp = op0;
 916
 917   /* Now logical-or VALUE into OP0, unless it is zero.  */
 918
 919   if (! all_zero)
 920     temp = expand_binop (mode, ior_optab, temp, value,
 921                          subtarget, 1, OPTAB_LIB_WIDEN);
 922   if (op0 != temp)
 923     emit_move_insn (op0, temp);
 924 }
 925 \f
 926 /* Store a bit field that is split across multiple accessible memory objects.
 927
 928    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 929    BITSIZE is the field width; BITPOS the position of its first bit
 930    (within the word).
 931    VALUE is the value to store.
 932
 933    This does not yet handle fields wider than BITS_PER_WORD.  */
 934
 935 static void
 936 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 937                        unsigned HOST_WIDE_INT bitpos, rtx value)
 938 {
 939   unsigned int unit;
 940   unsigned int bitsdone = 0;
 941
 942   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 943      much at a time.  */
 944   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 945     unit = BITS_PER_WORD;
 946   else
 947     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 948
 949   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 950      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 951      that VALUE might be a floating-point constant.  */
 952   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 953     {
 954       rtx word = gen_lowpart_common (word_mode, value);
 955
 956       if (word && (value != word))
 957         value = word;
 958       else
 959         value = gen_lowpart_common (word_mode,
 960                                     force_reg (GET_MODE (value) != VOIDmode
 961                                                ? GET_MODE (value)
 962                                                : word_mode, value));
 963     }
 964
 965   while (bitsdone < bitsize)
 966     {
 967       unsigned HOST_WIDE_INT thissize;
 968       rtx part, word;
 969       unsigned HOST_WIDE_INT thispos;
 970       unsigned HOST_WIDE_INT offset;
 971
 972       offset = (bitpos + bitsdone) / unit;
 973       thispos = (bitpos + bitsdone) % unit;
 974
 975       /* THISSIZE must not overrun a word boundary.  Otherwise,
 976          store_fixed_bit_field will call us again, and we will mutually
 977          recurse forever.  */
 978       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 979       thissize = MIN (thissize, unit - thispos);
 980
 981       if (BYTES_BIG_ENDIAN)
 982         {
 983           int total_bits;
 984
 985           /* We must do an endian conversion exactly the same way as it is
 986              done in extract_bit_field, so that the two calls to
 987              extract_fixed_bit_field will have comparable arguments.  */
 988           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 989             total_bits = BITS_PER_WORD;
 990           else
 991             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 992
 993           /* Fetch successively less significant portions.  */
 994           if (GET_CODE (value) == CONST_INT)
 995             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 996                              >> (bitsize - bitsdone - thissize))
 997                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 998           else
 999             /* The args are chosen so that the last part includes the
1000                lsb.  Give extract_bit_field the value it needs (with
1001                endianness compensation) to fetch the piece we want.  */
1002             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1003                                             total_bits - bitsize + bitsdone,
1004                                             NULL_RTX, 1);
1005         }
1006       else
1007         {
1008           /* Fetch successively more significant portions.  */
1009           if (GET_CODE (value) == CONST_INT)
1010             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1011                              >> bitsdone)
1012                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1013           else
1014             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1015                                             bitsdone, NULL_RTX, 1);
1016         }
1017
1018       /* If OP0 is a register, then handle OFFSET here.
1019
1020          When handling multiword bitfields, extract_bit_field may pass
1021          down a word_mode SUBREG of a larger REG for a bitfield that actually
1022          crosses a word boundary.  Thus, for a SUBREG, we must find
1023          the current word starting from the base register.  */
1024       if (GET_CODE (op0) == SUBREG)
1025         {
1026           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1027           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1028                                         GET_MODE (SUBREG_REG (op0)));
1029           offset = 0;
1030         }
1031       else if (REG_P (op0))
1032         {
1033           word = operand_subword_force (op0, offset, GET_MODE (op0));
1034           offset = 0;
1035         }
1036       else
1037         word = op0;
1038
1039       /* OFFSET is in UNITs, and UNIT is in bits.
1040          store_fixed_bit_field wants offset in bytes.  */
1041       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1042                              thispos, part);
1043       bitsdone += thissize;
1044     }
1045 }
1046 \f
1047 /* Generate code to extract a byte-field from STR_RTX
1048    containing BITSIZE bits, starting at BITNUM,
1049    and put it in TARGET if possible (if TARGET is nonzero).
1050    Regardless of TARGET, we return the rtx for where the value is placed.
1051
1052    STR_RTX is the structure containing the byte (a REG or MEM).
1053    UNSIGNEDP is nonzero if this is an unsigned bit field.
1054    MODE is the natural mode of the field value once extracted.
1055    TMODE is the mode the caller would like the value to have;
1056    but the value may be returned with type MODE instead.
1057
1058    TOTAL_SIZE is the size in bytes of the containing structure,
1059    or -1 if varying.
1060
1061    If a TARGET is specified and we can store in it at no extra cost,
1062    we do so, and return TARGET.
1063    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1064    if they are equally easy.  */
1065
1066 rtx
1067 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1068                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1069                    enum machine_mode mode, enum machine_mode tmode)
1070 {
1071   unsigned int unit
1072     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1073   unsigned HOST_WIDE_INT offset, bitpos;
1074   rtx op0 = str_rtx;
1075   rtx spec_target = target;
1076   rtx spec_target_subreg = 0;
1077   enum machine_mode int_mode;
1078   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1079   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1080   enum machine_mode mode1;
1081   int byte_offset;
1082
1083   if (tmode == VOIDmode)
1084     tmode = mode;
1085
1086   while (GET_CODE (op0) == SUBREG)
1087     {
1088       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1089       op0 = SUBREG_REG (op0);
1090     }
1091
1092   /* If we have an out-of-bounds access to a register, just return an
1093      uninitialized register of the required mode.  This can occur if the
1094      source code contains an out-of-bounds access to a small array.  */
1095   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1096     return gen_reg_rtx (tmode);
1097
1098   if (REG_P (op0)
1099       && mode == GET_MODE (op0)
1100       && bitnum == 0
1101       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1102     {
1103       /* We're trying to extract a full register from itself.  */
1104       return op0;
1105     }
1106
1107   /* Use vec_extract patterns for extracting parts of vectors whenever
1108      available.  */
1109   if (VECTOR_MODE_P (GET_MODE (op0))
1110       && !MEM_P (op0)
1111       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1112           != CODE_FOR_nothing)
1113       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1114           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1115     {
1116       enum machine_mode outermode = GET_MODE (op0);
1117       enum machine_mode innermode = GET_MODE_INNER (outermode);
1118       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1119       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1120       rtx rtxpos = GEN_INT (pos);
1121       rtx src = op0;
1122       rtx dest = NULL, pat, seq;
1123       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1124       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1125       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1126
1127       if (innermode == tmode || innermode == mode)
1128         dest = target;
1129
1130       if (!dest)
1131         dest = gen_reg_rtx (innermode);
1132
1133       start_sequence ();
1134
1135       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1136         dest = copy_to_mode_reg (mode0, dest);
1137
1138       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1139         src = copy_to_mode_reg (mode1, src);
1140
1141       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1142         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1143
1144       /* We could handle this, but we should always be called with a pseudo
1145          for our targets and all insns should take them as outputs.  */
1146       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1147                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1148                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1149
1150       pat = GEN_FCN (icode) (dest, src, rtxpos);
1151       seq = get_insns ();
1152       end_sequence ();
1153       if (pat)
1154         {
1155           emit_insn (seq);
1156           emit_insn (pat);
1157           return dest;
1158         }
1159     }
1160
1161   /* Make sure we are playing with integral modes.  Pun with subregs
1162      if we aren't.  */
1163   {
1164     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1165     if (imode != GET_MODE (op0))
1166       {
1167         if (MEM_P (op0))
1168           op0 = adjust_address (op0, imode, 0);
1169         else
1170           {
1171             gcc_assert (imode != BLKmode);
1172             op0 = gen_lowpart (imode, op0);
1173
1174             /* If we got a SUBREG, force it into a register since we
1175                aren't going to be able to do another SUBREG on it.  */
1176             if (GET_CODE (op0) == SUBREG)
1177               op0 = force_reg (imode, op0);
1178           }
1179       }
1180   }
1181
1182   /* We may be accessing data outside the field, which means
1183      we can alias adjacent data.  */
1184   if (MEM_P (op0))
1185     {
1186       op0 = shallow_copy_rtx (op0);
1187       set_mem_alias_set (op0, 0);
1188       set_mem_expr (op0, 0);
1189     }
1190
1191   /* Extraction of a full-word or multi-word value from a structure
1192      in a register or aligned memory can be done with just a SUBREG.
1193      A subword value in the least significant part of a register
1194      can also be extracted with a SUBREG.  For this, we need the
1195      byte offset of the value in op0.  */
1196
1197   bitpos = bitnum % unit;
1198   offset = bitnum / unit;
1199   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1200
1201   /* If OP0 is a register, BITPOS must count within a word.
1202      But as we have it, it counts within whatever size OP0 now has.
1203      On a bigendian machine, these are not the same, so convert.  */
1204   if (BYTES_BIG_ENDIAN
1205       && !MEM_P (op0)
1206       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1207     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1208
1209   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1210      If that's wrong, the solution is to test for it and set TARGET to 0
1211      if needed.  */
1212
1213   /* Only scalar integer modes can be converted via subregs.  There is an
1214      additional problem for FP modes here in that they can have a precision
1215      which is different from the size.  mode_for_size uses precision, but
1216      we want a mode based on the size, so we must avoid calling it for FP
1217      modes.  */
1218   mode1  = (SCALAR_INT_MODE_P (tmode)
1219             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1220             : mode);
1221
1222   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1223         && bitpos % BITS_PER_WORD == 0)
1224        || (mode1 != BLKmode
1225            /* ??? The big endian test here is wrong.  This is correct
1226               if the value is in a register, and if mode_for_size is not
1227               the same mode as op0.  This causes us to get unnecessarily
1228               inefficient code from the Thumb port when -mbig-endian.  */
1229            && (BYTES_BIG_ENDIAN
1230                ? bitpos + bitsize == BITS_PER_WORD
1231                : bitpos == 0)))
1232       && ((!MEM_P (op0)
1233            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1234                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1235            && GET_MODE_SIZE (mode1) != 0
1236            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1237           || (MEM_P (op0)
1238               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1239                   || (offset * BITS_PER_UNIT % bitsize == 0
1240                       && MEM_ALIGN (op0) % bitsize == 0)))))
1241     {
1242       if (mode1 != GET_MODE (op0))
1243         {
1244           if (MEM_P (op0))
1245             op0 = adjust_address (op0, mode1, offset);
1246           else
1247             {
1248               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1249                                              byte_offset);
1250               if (sub == NULL)
1251                 goto no_subreg_mode_swap;
1252               op0 = sub;
1253             }
1254         }
1255       if (mode1 != mode)
1256         return convert_to_mode (tmode, op0, unsignedp);
1257       return op0;
1258     }
1259  no_subreg_mode_swap:
1260
1261   /* Handle fields bigger than a word.  */
1262
1263   if (bitsize > BITS_PER_WORD)
1264     {
1265       /* Here we transfer the words of the field
1266          in the order least significant first.
1267          This is because the most significant word is the one which may
1268          be less than full.  */
1269
1270       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1271       unsigned int i;
1272
1273       if (target == 0 || !REG_P (target))
1274         target = gen_reg_rtx (mode);
1275
1276       /* Indicate for flow that the entire target reg is being set.  */
1277       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1278
1279       for (i = 0; i < nwords; i++)
1280         {
1281           /* If I is 0, use the low-order word in both field and target;
1282              if I is 1, use the next to lowest word; and so on.  */
1283           /* Word number in TARGET to use.  */
1284           unsigned int wordnum
1285             = (WORDS_BIG_ENDIAN
1286                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1287                : i);
1288           /* Offset from start of field in OP0.  */
1289           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1290                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1291                                                 * (int) BITS_PER_WORD))
1292                                      : (int) i * BITS_PER_WORD);
1293           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1294           rtx result_part
1295             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1296                                            bitsize - i * BITS_PER_WORD),
1297                                  bitnum + bit_offset, 1, target_part, mode,
1298                                  word_mode);
1299
1300           gcc_assert (target_part);
1301
1302           if (result_part != target_part)
1303             emit_move_insn (target_part, result_part);
1304         }
1305
1306       if (unsignedp)
1307         {
1308           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1309              need to be zero'd out.  */
1310           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1311             {
1312               unsigned int i, total_words;
1313
1314               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1315               for (i = nwords; i < total_words; i++)
1316                 emit_move_insn
1317                   (operand_subword (target,
1318                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1319                                     1, VOIDmode),
1320                    const0_rtx);
1321             }
1322           return target;
1323         }
1324
1325       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1326       target = expand_shift (LSHIFT_EXPR, mode, target,
1327                              build_int_cst (NULL_TREE,
1328                                             GET_MODE_BITSIZE (mode) - bitsize),
1329                              NULL_RTX, 0);
1330       return expand_shift (RSHIFT_EXPR, mode, target,
1331                            build_int_cst (NULL_TREE,
1332                                           GET_MODE_BITSIZE (mode) - bitsize),
1333                            NULL_RTX, 0);
1334     }
1335
1336   /* From here on we know the desired field is smaller than a word.  */
1337
1338   /* Check if there is a correspondingly-sized integer field, so we can
1339      safely extract it as one size of integer, if necessary; then
1340      truncate or extend to the size that is wanted; then use SUBREGs or
1341      convert_to_mode to get one of the modes we really wanted.  */
1342
1343   int_mode = int_mode_for_mode (tmode);
1344   if (int_mode == BLKmode)
1345     int_mode = int_mode_for_mode (mode);
1346   /* Should probably push op0 out to memory and then do a load.  */
1347   gcc_assert (int_mode != BLKmode);
1348
1349   /* OFFSET is the number of words or bytes (UNIT says which)
1350      from STR_RTX to the first word or byte containing part of the field.  */
1351   if (!MEM_P (op0))
1352     {
1353       if (offset != 0
1354           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1355         {
1356           if (!REG_P (op0))
1357             op0 = copy_to_reg (op0);
1358           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1359                                 op0, (offset * UNITS_PER_WORD));
1360         }
1361       offset = 0;
1362     }
1363
1364   /* Now OFFSET is nonzero only for memory operands.  */
1365
1366   if (unsignedp)
1367     {
1368       if (HAVE_extzv
1369           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1370           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1371                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1372         {
1373           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1374           rtx bitsize_rtx, bitpos_rtx;
1375           rtx last = get_last_insn ();
1376           rtx xop0 = op0;
1377           rtx xtarget = target;
1378           rtx xspec_target = spec_target;
1379           rtx xspec_target_subreg = spec_target_subreg;
1380           rtx pat;
1381           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1382
1383           if (MEM_P (xop0))
1384             {
1385               int save_volatile_ok = volatile_ok;
1386               volatile_ok = 1;
1387
1388               /* Is the memory operand acceptable?  */
1389               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1390                      (xop0, GET_MODE (xop0))))
1391                 {
1392                   /* No, load into a reg and extract from there.  */
1393                   enum machine_mode bestmode;
1394
1395                   /* Get the mode to use for inserting into this field.  If
1396                      OP0 is BLKmode, get the smallest mode consistent with the
1397                      alignment. If OP0 is a non-BLKmode object that is no
1398                      wider than MAXMODE, use its mode. Otherwise, use the
1399                      smallest mode containing the field.  */
1400
1401                   if (GET_MODE (xop0) == BLKmode
1402                       || (GET_MODE_SIZE (GET_MODE (op0))
1403                           > GET_MODE_SIZE (maxmode)))
1404                     bestmode = get_best_mode (bitsize, bitnum,
1405                                               MEM_ALIGN (xop0), maxmode,
1406                                               MEM_VOLATILE_P (xop0));
1407                   else
1408                     bestmode = GET_MODE (xop0);
1409
1410                   if (bestmode == VOIDmode
1411                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1412                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1413                     goto extzv_loses;
1414
1415                   /* Compute offset as multiple of this unit,
1416                      counting in bytes.  */
1417                   unit = GET_MODE_BITSIZE (bestmode);
1418                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1419                   xbitpos = bitnum % unit;
1420                   xop0 = adjust_address (xop0, bestmode, xoffset);
1421
1422                   /* Fetch it to a register in that size.  */
1423                   xop0 = force_reg (bestmode, xop0);
1424
1425                   /* XBITPOS counts within UNIT, which is what is expected.  */
1426                 }
1427               else
1428                 /* Get ref to first byte containing part of the field.  */
1429                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1430
1431               volatile_ok = save_volatile_ok;
1432             }
1433
1434           /* If op0 is a register, we need it in MAXMODE (which is usually
1435              SImode). to make it acceptable to the format of extzv.  */
1436           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1437             goto extzv_loses;
1438           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1439             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1440
1441           /* On big-endian machines, we count bits from the most significant.
1442              If the bit field insn does not, we must invert.  */
1443           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1444             xbitpos = unit - bitsize - xbitpos;
1445
1446           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1447           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1448             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1449
1450           unit = GET_MODE_BITSIZE (maxmode);
1451
1452           if (xtarget == 0
1453               || (flag_force_mem && MEM_P (xtarget)))
1454             xtarget = xspec_target = gen_reg_rtx (tmode);
1455
1456           if (GET_MODE (xtarget) != maxmode)
1457             {
1458               if (REG_P (xtarget))
1459                 {
1460                   int wider = (GET_MODE_SIZE (maxmode)
1461                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1462                   xtarget = gen_lowpart (maxmode, xtarget);
1463                   if (wider)
1464                     xspec_target_subreg = xtarget;
1465                 }
1466               else
1467                 xtarget = gen_reg_rtx (maxmode);
1468             }
1469
1470           /* If this machine's extzv insists on a register target,
1471              make sure we have one.  */
1472           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1473                  (xtarget, maxmode)))
1474             xtarget = gen_reg_rtx (maxmode);
1475
1476           bitsize_rtx = GEN_INT (bitsize);
1477           bitpos_rtx = GEN_INT (xbitpos);
1478
1479           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1480           if (pat)
1481             {
1482               emit_insn (pat);
1483               target = xtarget;
1484               spec_target = xspec_target;
1485               spec_target_subreg = xspec_target_subreg;
1486             }
1487           else
1488             {
1489               delete_insns_since (last);
1490               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1491                                                 bitpos, target, 1);
1492             }
1493         }
1494       else
1495       extzv_loses:
1496         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1497                                           bitpos, target, 1);
1498     }
1499   else
1500     {
1501       if (HAVE_extv
1502           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1503           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1504                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1505         {
1506           int xbitpos = bitpos, xoffset = offset;
1507           rtx bitsize_rtx, bitpos_rtx;
1508           rtx last = get_last_insn ();
1509           rtx xop0 = op0, xtarget = target;
1510           rtx xspec_target = spec_target;
1511           rtx xspec_target_subreg = spec_target_subreg;
1512           rtx pat;
1513           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1514
1515           if (MEM_P (xop0))
1516             {
1517               /* Is the memory operand acceptable?  */
1518               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1519                      (xop0, GET_MODE (xop0))))
1520                 {
1521                   /* No, load into a reg and extract from there.  */
1522                   enum machine_mode bestmode;
1523
1524                   /* Get the mode to use for inserting into this field.  If
1525                      OP0 is BLKmode, get the smallest mode consistent with the
1526                      alignment. If OP0 is a non-BLKmode object that is no
1527                      wider than MAXMODE, use its mode. Otherwise, use the
1528                      smallest mode containing the field.  */
1529
1530                   if (GET_MODE (xop0) == BLKmode
1531                       || (GET_MODE_SIZE (GET_MODE (op0))
1532                           > GET_MODE_SIZE (maxmode)))
1533                     bestmode = get_best_mode (bitsize, bitnum,
1534                                               MEM_ALIGN (xop0), maxmode,
1535                                               MEM_VOLATILE_P (xop0));
1536                   else
1537                     bestmode = GET_MODE (xop0);
1538
1539                   if (bestmode == VOIDmode
1540                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1541                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1542                     goto extv_loses;
1543
1544                   /* Compute offset as multiple of this unit,
1545                      counting in bytes.  */
1546                   unit = GET_MODE_BITSIZE (bestmode);
1547                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1548                   xbitpos = bitnum % unit;
1549                   xop0 = adjust_address (xop0, bestmode, xoffset);
1550
1551                   /* Fetch it to a register in that size.  */
1552                   xop0 = force_reg (bestmode, xop0);
1553
1554                   /* XBITPOS counts within UNIT, which is what is expected.  */
1555                 }
1556               else
1557                 /* Get ref to first byte containing part of the field.  */
1558                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1559             }
1560
1561           /* If op0 is a register, we need it in MAXMODE (which is usually
1562              SImode) to make it acceptable to the format of extv.  */
1563           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1564             goto extv_loses;
1565           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1566             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1567
1568           /* On big-endian machines, we count bits from the most significant.
1569              If the bit field insn does not, we must invert.  */
1570           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1571             xbitpos = unit - bitsize - xbitpos;
1572
1573           /* XBITPOS counts within a size of UNIT.
1574              Adjust to count within a size of MAXMODE.  */
1575           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1576             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1577
1578           unit = GET_MODE_BITSIZE (maxmode);
1579
1580           if (xtarget == 0
1581               || (flag_force_mem && MEM_P (xtarget)))
1582             xtarget = xspec_target = gen_reg_rtx (tmode);
1583
1584           if (GET_MODE (xtarget) != maxmode)
1585             {
1586               if (REG_P (xtarget))
1587                 {
1588                   int wider = (GET_MODE_SIZE (maxmode)
1589                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1590                   xtarget = gen_lowpart (maxmode, xtarget);
1591                   if (wider)
1592                     xspec_target_subreg = xtarget;
1593                 }
1594               else
1595                 xtarget = gen_reg_rtx (maxmode);
1596             }
1597
1598           /* If this machine's extv insists on a register target,
1599              make sure we have one.  */
1600           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1601                  (xtarget, maxmode)))
1602             xtarget = gen_reg_rtx (maxmode);
1603
1604           bitsize_rtx = GEN_INT (bitsize);
1605           bitpos_rtx = GEN_INT (xbitpos);
1606
1607           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1608           if (pat)
1609             {
1610               emit_insn (pat);
1611               target = xtarget;
1612               spec_target = xspec_target;
1613               spec_target_subreg = xspec_target_subreg;
1614             }
1615           else
1616             {
1617               delete_insns_since (last);
1618               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1619                                                 bitpos, target, 0);
1620             }
1621         }
1622       else
1623       extv_loses:
1624         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1625                                           bitpos, target, 0);
1626     }
1627   if (target == spec_target)
1628     return target;
1629   if (target == spec_target_subreg)
1630     return spec_target;
1631   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1632     {
1633       /* If the target mode is not a scalar integral, first convert to the
1634          integer mode of that size and then access it as a floating-point
1635          value via a SUBREG.  */
1636       if (!SCALAR_INT_MODE_P (tmode))
1637         {
1638           enum machine_mode smode
1639             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1640           target = convert_to_mode (smode, target, unsignedp);
1641           target = force_reg (smode, target);
1642           return gen_lowpart (tmode, target);
1643         }
1644
1645       return convert_to_mode (tmode, target, unsignedp);
1646     }
1647   return target;
1648 }
1649 \f
1650 /* Extract a bit field using shifts and boolean operations
1651    Returns an rtx to represent the value.
1652    OP0 addresses a register (word) or memory (byte).
1653    BITPOS says which bit within the word or byte the bit field starts in.
1654    OFFSET says how many bytes farther the bit field starts;
1655     it is 0 if OP0 is a register.
1656    BITSIZE says how many bits long the bit field is.
1657     (If OP0 is a register, it may be narrower than a full word,
1658      but BITPOS still counts within a full word,
1659      which is significant on bigendian machines.)
1660
1661    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1662    If TARGET is nonzero, attempts to store the value there
1663    and return TARGET, but this is not guaranteed.
1664    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1665
1666 static rtx
1667 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1668                          unsigned HOST_WIDE_INT offset,
1669                          unsigned HOST_WIDE_INT bitsize,
1670                          unsigned HOST_WIDE_INT bitpos, rtx target,
1671                          int unsignedp)
1672 {
1673   unsigned int total_bits = BITS_PER_WORD;
1674   enum machine_mode mode;
1675
1676   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1677     {
1678       /* Special treatment for a bit field split across two registers.  */
1679       if (bitsize + bitpos > BITS_PER_WORD)
1680         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1681     }
1682   else
1683     {
1684       /* Get the proper mode to use for this field.  We want a mode that
1685          includes the entire field.  If such a mode would be larger than
1686          a word, we won't be doing the extraction the normal way.  */
1687
1688       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1689                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1690
1691       if (mode == VOIDmode)
1692         /* The only way this should occur is if the field spans word
1693            boundaries.  */
1694         return extract_split_bit_field (op0, bitsize,
1695                                         bitpos + offset * BITS_PER_UNIT,
1696                                         unsignedp);
1697
1698       total_bits = GET_MODE_BITSIZE (mode);
1699
1700       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1701          be in the range 0 to total_bits-1, and put any excess bytes in
1702          OFFSET.  */
1703       if (bitpos >= total_bits)
1704         {
1705           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1706           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1707                      * BITS_PER_UNIT);
1708         }
1709
1710       /* Get ref to an aligned byte, halfword, or word containing the field.
1711          Adjust BITPOS to be position within a word,
1712          and OFFSET to be the offset of that word.
1713          Then alter OP0 to refer to that word.  */
1714       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1715       offset -= (offset % (total_bits / BITS_PER_UNIT));
1716       op0 = adjust_address (op0, mode, offset);
1717     }
1718
1719   mode = GET_MODE (op0);
1720
1721   if (BYTES_BIG_ENDIAN)
1722     /* BITPOS is the distance between our msb and that of OP0.
1723        Convert it to the distance from the lsb.  */
1724     bitpos = total_bits - bitsize - bitpos;
1725
1726   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1727      We have reduced the big-endian case to the little-endian case.  */
1728
1729   if (unsignedp)
1730     {
1731       if (bitpos)
1732         {
1733           /* If the field does not already start at the lsb,
1734              shift it so it does.  */
1735           tree amount = build_int_cst (NULL_TREE, bitpos);
1736           /* Maybe propagate the target for the shift.  */
1737           /* But not if we will return it--could confuse integrate.c.  */
1738           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1739           if (tmode != mode) subtarget = 0;
1740           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1741         }
1742       /* Convert the value to the desired mode.  */
1743       if (mode != tmode)
1744         op0 = convert_to_mode (tmode, op0, 1);
1745
1746       /* Unless the msb of the field used to be the msb when we shifted,
1747          mask out the upper bits.  */
1748
1749       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1750         return expand_binop (GET_MODE (op0), and_optab, op0,
1751                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1752                              target, 1, OPTAB_LIB_WIDEN);
1753       return op0;
1754     }
1755
1756   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1757      then arithmetic-shift its lsb to the lsb of the word.  */
1758   op0 = force_reg (mode, op0);
1759   if (mode != tmode)
1760     target = 0;
1761
1762   /* Find the narrowest integer mode that contains the field.  */
1763
1764   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1765        mode = GET_MODE_WIDER_MODE (mode))
1766     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1767       {
1768         op0 = convert_to_mode (mode, op0, 0);
1769         break;
1770       }
1771
1772   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1773     {
1774       tree amount
1775         = build_int_cst (NULL_TREE,
1776                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1777       /* Maybe propagate the target for the shift.  */
1778       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1779       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1780     }
1781
1782   return expand_shift (RSHIFT_EXPR, mode, op0,
1783                        build_int_cst (NULL_TREE,
1784                                       GET_MODE_BITSIZE (mode) - bitsize),
1785                        target, 0);
1786 }
1787 \f
1788 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1789    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1790    complement of that if COMPLEMENT.  The mask is truncated if
1791    necessary to the width of mode MODE.  The mask is zero-extended if
1792    BITSIZE+BITPOS is too small for MODE.  */
1793
1794 static rtx
1795 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1796 {
1797   HOST_WIDE_INT masklow, maskhigh;
1798
1799   if (bitsize == 0)
1800     masklow = 0;
1801   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1802     masklow = (HOST_WIDE_INT) -1 << bitpos;
1803   else
1804     masklow = 0;
1805
1806   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1807     masklow &= ((unsigned HOST_WIDE_INT) -1
1808                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1809
1810   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1811     maskhigh = -1;
1812   else
1813     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1814
1815   if (bitsize == 0)
1816     maskhigh = 0;
1817   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1818     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1819                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1820   else
1821     maskhigh = 0;
1822
1823   if (complement)
1824     {
1825       maskhigh = ~maskhigh;
1826       masklow = ~masklow;
1827     }
1828
1829   return immed_double_const (masklow, maskhigh, mode);
1830 }
1831
1832 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1833    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1834
1835 static rtx
1836 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1837 {
1838   unsigned HOST_WIDE_INT v = INTVAL (value);
1839   HOST_WIDE_INT low, high;
1840
1841   if (bitsize < HOST_BITS_PER_WIDE_INT)
1842     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1843
1844   if (bitpos < HOST_BITS_PER_WIDE_INT)
1845     {
1846       low = v << bitpos;
1847       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1848     }
1849   else
1850     {
1851       low = 0;
1852       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1853     }
1854
1855   return immed_double_const (low, high, mode);
1856 }
1857 \f
1858 /* Extract a bit field from a memory by forcing the alignment of the
1859    memory.  This efficient only if the field spans at least 4 boundaries.
1860
1861    OP0 is the MEM.
1862    BITSIZE is the field width; BITPOS is the position of the first bit.
1863    UNSIGNEDP is true if the result should be zero-extended.  */
1864
1865 static rtx
1866 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1867                                    unsigned HOST_WIDE_INT bitpos,
1868                                    int unsignedp)
1869 {
1870   enum machine_mode mode, dmode;
1871   unsigned int m_bitsize, m_size;
1872   unsigned int sign_shift_up, sign_shift_dn;
1873   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1874
1875   /* Choose a mode that will fit BITSIZE.  */
1876   mode = smallest_mode_for_size (bitsize, MODE_INT);
1877   m_size = GET_MODE_SIZE (mode);
1878   m_bitsize = GET_MODE_BITSIZE (mode);
1879
1880   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1881   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1882   if (dmode == BLKmode)
1883     return NULL;
1884
1885   do_pending_stack_adjust ();
1886   start = get_last_insn ();
1887
1888   /* At the end, we'll need an additional shift to deal with sign/zero
1889      extension.  By default this will be a left+right shift of the
1890      appropriate size.  But we may be able to eliminate one of them.  */
1891   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1892
1893   if (STRICT_ALIGNMENT)
1894     {
1895       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1896       bitpos %= BITS_PER_UNIT;
1897
1898       /* We load two values to be concatenate.  There's an edge condition
1899          that bears notice -- an aligned value at the end of a page can
1900          only load one value lest we segfault.  So the two values we load
1901          are at "base & -size" and "(base + size - 1) & -size".  If base
1902          is unaligned, the addresses will be aligned and sequential; if
1903          base is aligned, the addresses will both be equal to base.  */
1904
1905       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1906                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1907                                 NULL, true, OPTAB_LIB_WIDEN);
1908       mark_reg_pointer (a1, m_bitsize);
1909       v1 = gen_rtx_MEM (mode, a1);
1910       set_mem_align (v1, m_bitsize);
1911       v1 = force_reg (mode, validize_mem (v1));
1912
1913       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1914       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1915                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1916                                 NULL, true, OPTAB_LIB_WIDEN);
1917       v2 = gen_rtx_MEM (mode, a2);
1918       set_mem_align (v2, m_bitsize);
1919       v2 = force_reg (mode, validize_mem (v2));
1920
1921       /* Combine these two values into a double-word value.  */
1922       if (m_bitsize == BITS_PER_WORD)
1923         {
1924           comb = gen_reg_rtx (dmode);
1925           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1926           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1927           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1928         }
1929       else
1930         {
1931           if (BYTES_BIG_ENDIAN)
1932             comb = v1, v1 = v2, v2 = comb;
1933           v1 = convert_modes (dmode, mode, v1, true);
1934           if (v1 == NULL)
1935             goto fail;
1936           v2 = convert_modes (dmode, mode, v2, true);
1937           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1938                                     NULL, true, OPTAB_LIB_WIDEN);
1939           if (v2 == NULL)
1940             goto fail;
1941           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1942                                       true, OPTAB_LIB_WIDEN);
1943           if (comb == NULL)
1944             goto fail;
1945         }
1946
1947       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1948                                    NULL, true, OPTAB_LIB_WIDEN);
1949       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1950
1951       if (bitpos != 0)
1952         {
1953           if (sign_shift_up <= bitpos)
1954             bitpos -= sign_shift_up, sign_shift_up = 0;
1955           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1956                                        NULL, true, OPTAB_LIB_WIDEN);
1957         }
1958     }
1959   else
1960     {
1961       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1962       bitpos %= BITS_PER_UNIT;
1963
1964       /* When strict alignment is not required, we can just load directly
1965          from memory without masking.  If the remaining BITPOS offset is
1966          small enough, we may be able to do all operations in MODE as
1967          opposed to DMODE.  */
1968       if (bitpos + bitsize <= m_bitsize)
1969         dmode = mode;
1970       comb = adjust_address (op0, dmode, offset);
1971
1972       if (sign_shift_up <= bitpos)
1973         bitpos -= sign_shift_up, sign_shift_up = 0;
1974       shift = GEN_INT (bitpos);
1975     }
1976
1977   /* Shift down the double-word such that the requested value is at bit 0.  */
1978   if (shift != const0_rtx)
1979     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1980                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1981   if (comb == NULL)
1982     goto fail;
1983
1984   /* If the field exactly matches MODE, then all we need to do is return the
1985      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1986   result = force_reg (mode, gen_lowpart (mode, comb));
1987
1988   if (sign_shift_up)
1989     result = expand_simple_binop (mode, ASHIFT, result,
1990                                   GEN_INT (sign_shift_up),
1991                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1992   if (sign_shift_dn)
1993     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1994                                   result, GEN_INT (sign_shift_dn),
1995                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1996
1997   return result;
1998
1999  fail:
2000   delete_insns_since (start);
2001   return NULL;
2002 }
2003
2004 /* Extract a bit field that is split across two words
2005    and return an RTX for the result.
2006
2007    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2008    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2009    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2010
2011 static rtx
2012 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2013                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2014 {
2015   unsigned int unit;
2016   unsigned int bitsdone = 0;
2017   rtx result = NULL_RTX;
2018   int first = 1;
2019
2020   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2021      much at a time.  */
2022   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2023     unit = BITS_PER_WORD;
2024   else
2025     {
2026       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2027       if (0 && bitsize / unit > 2)
2028         {
2029           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2030                                                        unsignedp);
2031           if (tmp)
2032             return tmp;
2033         }
2034     }
2035
2036   while (bitsdone < bitsize)
2037     {
2038       unsigned HOST_WIDE_INT thissize;
2039       rtx part, word;
2040       unsigned HOST_WIDE_INT thispos;
2041       unsigned HOST_WIDE_INT offset;
2042
2043       offset = (bitpos + bitsdone) / unit;
2044       thispos = (bitpos + bitsdone) % unit;
2045
2046       /* THISSIZE must not overrun a word boundary.  Otherwise,
2047          extract_fixed_bit_field will call us again, and we will mutually
2048          recurse forever.  */
2049       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2050       thissize = MIN (thissize, unit - thispos);
2051
2052       /* If OP0 is a register, then handle OFFSET here.
2053
2054          When handling multiword bitfields, extract_bit_field may pass
2055          down a word_mode SUBREG of a larger REG for a bitfield that actually
2056          crosses a word boundary.  Thus, for a SUBREG, we must find
2057          the current word starting from the base register.  */
2058       if (GET_CODE (op0) == SUBREG)
2059         {
2060           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2061           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2062                                         GET_MODE (SUBREG_REG (op0)));
2063           offset = 0;
2064         }
2065       else if (REG_P (op0))
2066         {
2067           word = operand_subword_force (op0, offset, GET_MODE (op0));
2068           offset = 0;
2069         }
2070       else
2071         word = op0;
2072
2073       /* Extract the parts in bit-counting order,
2074          whose meaning is determined by BYTES_PER_UNIT.
2075          OFFSET is in UNITs, and UNIT is in bits.
2076          extract_fixed_bit_field wants offset in bytes.  */
2077       part = extract_fixed_bit_field (word_mode, word,
2078                                       offset * unit / BITS_PER_UNIT,
2079                                       thissize, thispos, 0, 1);
2080       bitsdone += thissize;
2081
2082       /* Shift this part into place for the result.  */
2083       if (BYTES_BIG_ENDIAN)
2084         {
2085           if (bitsize != bitsdone)
2086             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2087                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2088                                  0, 1);
2089         }
2090       else
2091         {
2092           if (bitsdone != thissize)
2093             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2094                                  build_int_cst (NULL_TREE,
2095                                                 bitsdone - thissize), 0, 1);
2096         }
2097
2098       if (first)
2099         result = part;
2100       else
2101         /* Combine the parts with bitwise or.  This works
2102            because we extracted each part as an unsigned bit field.  */
2103         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2104                                OPTAB_LIB_WIDEN);
2105
2106       first = 0;
2107     }
2108
2109   /* Unsigned bit field: we are done.  */
2110   if (unsignedp)
2111     return result;
2112   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2113   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2114                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2115                          NULL_RTX, 0);
2116   return expand_shift (RSHIFT_EXPR, word_mode, result,
2117                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2118                        NULL_RTX, 0);
2119 }
2120 \f
2121 /* Add INC into TARGET.  */
2122
2123 void
2124 expand_inc (rtx target, rtx inc)
2125 {
2126   rtx value = expand_binop (GET_MODE (target), add_optab,
2127                             target, inc,
2128                             target, 0, OPTAB_LIB_WIDEN);
2129   if (value != target)
2130     emit_move_insn (target, value);
2131 }
2132
2133 /* Subtract DEC from TARGET.  */
2134
2135 void
2136 expand_dec (rtx target, rtx dec)
2137 {
2138   rtx value = expand_binop (GET_MODE (target), sub_optab,
2139                             target, dec,
2140                             target, 0, OPTAB_LIB_WIDEN);
2141   if (value != target)
2142     emit_move_insn (target, value);
2143 }
2144 \f
2145 /* Output a shift instruction for expression code CODE,
2146    with SHIFTED being the rtx for the value to shift,
2147    and AMOUNT the tree for the amount to shift by.
2148    Store the result in the rtx TARGET, if that is convenient.
2149    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2150    Return the rtx for where the value is.  */
2151
2152 rtx
2153 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2154               tree amount, rtx target, int unsignedp)
2155 {
2156   rtx op1, temp = 0;
2157   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2158   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2159   int try;
2160
2161   /* Previously detected shift-counts computed by NEGATE_EXPR
2162      and shifted in the other direction; but that does not work
2163      on all machines.  */
2164
2165   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2166
2167   if (SHIFT_COUNT_TRUNCATED)
2168     {
2169       if (GET_CODE (op1) == CONST_INT
2170           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2171               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2172         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2173                        % GET_MODE_BITSIZE (mode));
2174       else if (GET_CODE (op1) == SUBREG
2175                && subreg_lowpart_p (op1))
2176         op1 = SUBREG_REG (op1);
2177     }
2178
2179   if (op1 == const0_rtx)
2180     return shifted;
2181
2182   /* Check whether its cheaper to implement a left shift by a constant
2183      bit count by a sequence of additions.  */
2184   if (code == LSHIFT_EXPR
2185       && GET_CODE (op1) == CONST_INT
2186       && INTVAL (op1) > 0
2187       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2188       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2189     {
2190       int i;
2191       for (i = 0; i < INTVAL (op1); i++)
2192         {
2193           temp = force_reg (mode, shifted);
2194           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2195                                   unsignedp, OPTAB_LIB_WIDEN);
2196         }
2197       return shifted;
2198     }
2199
2200   for (try = 0; temp == 0 && try < 3; try++)
2201     {
2202       enum optab_methods methods;
2203
2204       if (try == 0)
2205         methods = OPTAB_DIRECT;
2206       else if (try == 1)
2207         methods = OPTAB_WIDEN;
2208       else
2209         methods = OPTAB_LIB_WIDEN;
2210
2211       if (rotate)
2212         {
2213           /* Widening does not work for rotation.  */
2214           if (methods == OPTAB_WIDEN)
2215             continue;
2216           else if (methods == OPTAB_LIB_WIDEN)
2217             {
2218               /* If we have been unable to open-code this by a rotation,
2219                  do it as the IOR of two shifts.  I.e., to rotate A
2220                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2221                  where C is the bitsize of A.
2222
2223                  It is theoretically possible that the target machine might
2224                  not be able to perform either shift and hence we would
2225                  be making two libcalls rather than just the one for the
2226                  shift (similarly if IOR could not be done).  We will allow
2227                  this extremely unlikely lossage to avoid complicating the
2228                  code below.  */
2229
2230               rtx subtarget = target == shifted ? 0 : target;
2231               rtx temp1;
2232               tree type = TREE_TYPE (amount);
2233               tree new_amount = make_tree (type, op1);
2234               tree other_amount
2235                 = fold (build2 (MINUS_EXPR, type,
2236                                 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2237                                 amount));
2238
2239               shifted = force_reg (mode, shifted);
2240
2241               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2242                                    mode, shifted, new_amount, subtarget, 1);
2243               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2244                                     mode, shifted, other_amount, 0, 1);
2245               return expand_binop (mode, ior_optab, temp, temp1, target,
2246                                    unsignedp, methods);
2247             }
2248
2249           temp = expand_binop (mode,
2250                                left ? rotl_optab : rotr_optab,
2251                                shifted, op1, target, unsignedp, methods);
2252
2253           /* If we don't have the rotate, but we are rotating by a constant
2254              that is in range, try a rotate in the opposite direction.  */
2255
2256           if (temp == 0 && GET_CODE (op1) == CONST_INT
2257               && INTVAL (op1) > 0
2258               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2259             temp = expand_binop (mode,
2260                                  left ? rotr_optab : rotl_optab,
2261                                  shifted,
2262                                  GEN_INT (GET_MODE_BITSIZE (mode)
2263                                           - INTVAL (op1)),
2264                                  target, unsignedp, methods);
2265         }
2266       else if (unsignedp)
2267         temp = expand_binop (mode,
2268                              left ? ashl_optab : lshr_optab,
2269                              shifted, op1, target, unsignedp, methods);
2270
2271       /* Do arithmetic shifts.
2272          Also, if we are going to widen the operand, we can just as well
2273          use an arithmetic right-shift instead of a logical one.  */
2274       if (temp == 0 && ! rotate
2275           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2276         {
2277           enum optab_methods methods1 = methods;
2278
2279           /* If trying to widen a log shift to an arithmetic shift,
2280              don't accept an arithmetic shift of the same size.  */
2281           if (unsignedp)
2282             methods1 = OPTAB_MUST_WIDEN;
2283
2284           /* Arithmetic shift */
2285
2286           temp = expand_binop (mode,
2287                                left ? ashl_optab : ashr_optab,
2288                                shifted, op1, target, unsignedp, methods1);
2289         }
2290
2291       /* We used to try extzv here for logical right shifts, but that was
2292          only useful for one machine, the VAX, and caused poor code
2293          generation there for lshrdi3, so the code was deleted and a
2294          define_expand for lshrsi3 was added to vax.md.  */
2295     }
2296
2297   gcc_assert (temp);
2298   return temp;
2299 }
2300 \f
2301 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2302                   alg_add_t_m2, alg_sub_t_m2,
2303                   alg_add_factor, alg_sub_factor,
2304                   alg_add_t2_m, alg_sub_t2_m };
2305
2306 /* This structure holds the "cost" of a multiply sequence.  The
2307    "cost" field holds the total rtx_cost of every operator in the
2308    synthetic multiplication sequence, hence cost(a op b) is defined
2309    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2310    The "latency" field holds the minimum possible latency of the
2311    synthetic multiply, on a hypothetical infinitely parallel CPU.
2312    This is the critical path, or the maximum height, of the expression
2313    tree which is the sum of rtx_costs on the most expensive path from
2314    any leaf to the root.  Hence latency(a op b) is defined as zero for
2315    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2316
2317 struct mult_cost {
2318   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2319   short latency;  /* The latency of the multiplication sequence.  */
2320 };
2321
2322 /* This macro is used to compare a pointer to a mult_cost against an
2323    single integer "rtx_cost" value.  This is equivalent to the macro
2324    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2325 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2326                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2327
2328 /* This macro is used to compare two pointers to mult_costs against
2329    each other.  The macro returns true if X is cheaper than Y.
2330    Currently, the cheaper of two mult_costs is the one with the
2331    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2332 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2333                                  || ((X)->cost == (Y)->cost     \
2334                                      && (X)->latency < (Y)->latency))
2335
2336 /* This structure records a sequence of operations.
2337    `ops' is the number of operations recorded.
2338    `cost' is their total cost.
2339    The operations are stored in `op' and the corresponding
2340    logarithms of the integer coefficients in `log'.
2341
2342    These are the operations:
2343    alg_zero             total := 0;
2344    alg_m                total := multiplicand;
2345    alg_shift            total := total * coeff
2346    alg_add_t_m2         total := total + multiplicand * coeff;
2347    alg_sub_t_m2         total := total - multiplicand * coeff;
2348    alg_add_factor       total := total * coeff + total;
2349    alg_sub_factor       total := total * coeff - total;
2350    alg_add_t2_m         total := total * coeff + multiplicand;
2351    alg_sub_t2_m         total := total * coeff - multiplicand;
2352
2353    The first operand must be either alg_zero or alg_m.  */
2354
2355 struct algorithm
2356 {
2357   struct mult_cost cost;
2358   short ops;
2359   /* The size of the OP and LOG fields are not directly related to the
2360      word size, but the worst-case algorithms will be if we have few
2361      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2362      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2363      in total wordsize operations.  */
2364   enum alg_code op[MAX_BITS_PER_WORD];
2365   char log[MAX_BITS_PER_WORD];
2366 };
2367
2368 /* The entry for our multiplication cache/hash table.  */
2369 struct alg_hash_entry {
2370   /* The number we are multiplying by.  */
2371   unsigned int t;
2372
2373   /* The mode in which we are multiplying something by T.  */
2374   enum machine_mode mode;
2375
2376   /* The best multiplication algorithm for t.  */
2377   enum alg_code alg;
2378 };
2379
2380 /* The number of cache/hash entries.  */
2381 #define NUM_ALG_HASH_ENTRIES 307
2382
2383 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2384    actually a hash table.  If we have a collision, that the older
2385    entry is kicked out.  */
2386 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2387
2388 /* Indicates the type of fixup needed after a constant multiplication.
2389    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2390    the result should be negated, and ADD_VARIANT means that the
2391    multiplicand should be added to the result.  */
2392 enum mult_variant {basic_variant, negate_variant, add_variant};
2393
2394 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2395                         const struct mult_cost *, enum machine_mode mode);
2396 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2397                                  struct algorithm *, enum mult_variant *, int);
2398 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2399                               const struct algorithm *, enum mult_variant);
2400 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2401                                                  int, rtx *, int *, int *);
2402 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2403 static rtx extract_high_half (enum machine_mode, rtx);
2404 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2405 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2406                                        int, int);
2407 /* Compute and return the best algorithm for multiplying by T.
2408    The algorithm must cost less than cost_limit
2409    If retval.cost >= COST_LIMIT, no algorithm was found and all
2410    other field of the returned struct are undefined.
2411    MODE is the machine mode of the multiplication.  */
2412
2413 static void
2414 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2415             const struct mult_cost *cost_limit, enum machine_mode mode)
2416 {
2417   int m;
2418   struct algorithm *alg_in, *best_alg;
2419   struct mult_cost best_cost;
2420   struct mult_cost new_limit;
2421   int op_cost, op_latency;
2422   unsigned HOST_WIDE_INT q;
2423   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2424   int hash_index;
2425   bool cache_hit = false;
2426   enum alg_code cache_alg = alg_zero;
2427
2428   /* Indicate that no algorithm is yet found.  If no algorithm
2429      is found, this value will be returned and indicate failure.  */
2430   alg_out->cost.cost = cost_limit->cost + 1;
2431   alg_out->cost.latency = cost_limit->latency + 1;
2432
2433   if (cost_limit->cost < 0
2434       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2435     return;
2436
2437   /* Restrict the bits of "t" to the multiplication's mode.  */
2438   t &= GET_MODE_MASK (mode);
2439
2440   /* t == 1 can be done in zero cost.  */
2441   if (t == 1)
2442     {
2443       alg_out->ops = 1;
2444       alg_out->cost.cost = 0;
2445       alg_out->cost.latency = 0;
2446       alg_out->op[0] = alg_m;
2447       return;
2448     }
2449
2450   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2451      fail now.  */
2452   if (t == 0)
2453     {
2454       if (MULT_COST_LESS (cost_limit, zero_cost))
2455         return;
2456       else
2457         {
2458           alg_out->ops = 1;
2459           alg_out->cost.cost = zero_cost;
2460           alg_out->cost.latency = zero_cost;
2461           alg_out->op[0] = alg_zero;
2462           return;
2463         }
2464     }
2465
2466   /* We'll be needing a couple extra algorithm structures now.  */
2467
2468   alg_in = alloca (sizeof (struct algorithm));
2469   best_alg = alloca (sizeof (struct algorithm));
2470   best_cost = *cost_limit;
2471
2472   /* Compute the hash index.  */
2473   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2474
2475   /* See if we already know what to do for T.  */
2476   if (alg_hash[hash_index].t == t
2477       && alg_hash[hash_index].mode == mode
2478       && alg_hash[hash_index].alg != alg_unknown)
2479     {
2480       cache_hit = true;
2481       cache_alg = alg_hash[hash_index].alg;
2482       switch (cache_alg)
2483         {
2484         case alg_shift:
2485           goto do_alg_shift;
2486
2487         case alg_add_t_m2:
2488         case alg_sub_t_m2:
2489           goto do_alg_addsub_t_m2;
2490
2491         case alg_add_factor:
2492         case alg_sub_factor:
2493           goto do_alg_addsub_factor;
2494
2495         case alg_add_t2_m:
2496           goto do_alg_add_t2_m;
2497
2498         case alg_sub_t2_m:
2499           goto do_alg_sub_t2_m;
2500
2501         default:
2502           gcc_unreachable ();
2503         }
2504     }
2505
2506   /* If we have a group of zero bits at the low-order part of T, try
2507      multiplying by the remaining bits and then doing a shift.  */
2508
2509   if ((t & 1) == 0)
2510     {
2511     do_alg_shift:
2512       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2513       if (m < maxm)
2514         {
2515           q = t >> m;
2516           /* The function expand_shift will choose between a shift and
2517              a sequence of additions, so the observed cost is given as
2518              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2519           op_cost = m * add_cost[mode];
2520           if (shift_cost[mode][m] < op_cost)
2521             op_cost = shift_cost[mode][m];
2522           new_limit.cost = best_cost.cost - op_cost;
2523           new_limit.latency = best_cost.latency - op_cost;
2524           synth_mult (alg_in, q, &new_limit, mode);
2525
2526           alg_in->cost.cost += op_cost;
2527           alg_in->cost.latency += op_cost;
2528           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2529             {
2530               struct algorithm *x;
2531               best_cost = alg_in->cost;
2532               x = alg_in, alg_in = best_alg, best_alg = x;
2533               best_alg->log[best_alg->ops] = m;
2534               best_alg->op[best_alg->ops] = alg_shift;
2535             }
2536         }
2537       if (cache_hit)
2538         goto done;
2539     }
2540
2541   /* If we have an odd number, add or subtract one.  */
2542   if ((t & 1) != 0)
2543     {
2544       unsigned HOST_WIDE_INT w;
2545
2546     do_alg_addsub_t_m2:
2547       for (w = 1; (w & t) != 0; w <<= 1)
2548         ;
2549       /* If T was -1, then W will be zero after the loop.  This is another
2550          case where T ends with ...111.  Handling this with (T + 1) and
2551          subtract 1 produces slightly better code and results in algorithm
2552          selection much faster than treating it like the ...0111 case
2553          below.  */
2554       if (w == 0
2555           || (w > 2
2556               /* Reject the case where t is 3.
2557                  Thus we prefer addition in that case.  */
2558               && t != 3))
2559         {
2560           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2561
2562           op_cost = add_cost[mode];
2563           new_limit.cost = best_cost.cost - op_cost;
2564           new_limit.latency = best_cost.latency - op_cost;
2565           synth_mult (alg_in, t + 1, &new_limit, mode);
2566
2567           alg_in->cost.cost += op_cost;
2568           alg_in->cost.latency += op_cost;
2569           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2570             {
2571               struct algorithm *x;
2572               best_cost = alg_in->cost;
2573               x = alg_in, alg_in = best_alg, best_alg = x;
2574               best_alg->log[best_alg->ops] = 0;
2575               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2576             }
2577         }
2578       else
2579         {
2580           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2581
2582           op_cost = add_cost[mode];
2583           new_limit.cost = best_cost.cost - op_cost;
2584           new_limit.latency = best_cost.latency - op_cost;
2585           synth_mult (alg_in, t - 1, &new_limit, mode);
2586
2587           alg_in->cost.cost += op_cost;
2588           alg_in->cost.latency += op_cost;
2589           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2590             {
2591               struct algorithm *x;
2592               best_cost = alg_in->cost;
2593               x = alg_in, alg_in = best_alg, best_alg = x;
2594               best_alg->log[best_alg->ops] = 0;
2595               best_alg->op[best_alg->ops] = alg_add_t_m2;
2596             }
2597         }
2598       if (cache_hit)
2599         goto done;
2600     }
2601
2602   /* Look for factors of t of the form
2603      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2604      If we find such a factor, we can multiply by t using an algorithm that
2605      multiplies by q, shift the result by m and add/subtract it to itself.
2606
2607      We search for large factors first and loop down, even if large factors
2608      are less probable than small; if we find a large factor we will find a
2609      good sequence quickly, and therefore be able to prune (by decreasing
2610      COST_LIMIT) the search.  */
2611
2612  do_alg_addsub_factor:
2613   for (m = floor_log2 (t - 1); m >= 2; m--)
2614     {
2615       unsigned HOST_WIDE_INT d;
2616
2617       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2618       if (t % d == 0 && t > d && m < maxm
2619           && (!cache_hit || cache_alg == alg_add_factor))
2620         {
2621           /* If the target has a cheap shift-and-add instruction use
2622              that in preference to a shift insn followed by an add insn.
2623              Assume that the shift-and-add is "atomic" with a latency
2624              equal to its cost, otherwise assume that on superscalar
2625              hardware the shift may be executed concurrently with the
2626              earlier steps in the algorithm.  */
2627           op_cost = add_cost[mode] + shift_cost[mode][m];
2628           if (shiftadd_cost[mode][m] < op_cost)
2629             {
2630               op_cost = shiftadd_cost[mode][m];
2631               op_latency = op_cost;
2632             }
2633           else
2634             op_latency = add_cost[mode];
2635
2636           new_limit.cost = best_cost.cost - op_cost;
2637           new_limit.latency = best_cost.latency - op_latency;
2638           synth_mult (alg_in, t / d, &new_limit, mode);
2639
2640           alg_in->cost.cost += op_cost;
2641           alg_in->cost.latency += op_latency;
2642           if (alg_in->cost.latency < op_cost)
2643             alg_in->cost.latency = op_cost;
2644           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2645             {
2646               struct algorithm *x;
2647               best_cost = alg_in->cost;
2648               x = alg_in, alg_in = best_alg, best_alg = x;
2649               best_alg->log[best_alg->ops] = m;
2650               best_alg->op[best_alg->ops] = alg_add_factor;
2651             }
2652           /* Other factors will have been taken care of in the recursion.  */
2653           break;
2654         }
2655
2656       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2657       if (t % d == 0 && t > d && m < maxm
2658           && (!cache_hit || cache_alg == alg_sub_factor))
2659         {
2660           /* If the target has a cheap shift-and-subtract insn use
2661              that in preference to a shift insn followed by a sub insn.
2662              Assume that the shift-and-sub is "atomic" with a latency
2663              equal to it's cost, otherwise assume that on superscalar
2664              hardware the shift may be executed concurrently with the
2665              earlier steps in the algorithm.  */
2666           op_cost = add_cost[mode] + shift_cost[mode][m];
2667           if (shiftsub_cost[mode][m] < op_cost)
2668             {
2669               op_cost = shiftsub_cost[mode][m];
2670               op_latency = op_cost;
2671             }
2672           else
2673             op_latency = add_cost[mode];
2674
2675           new_limit.cost = best_cost.cost - op_cost;
2676           new_limit.latency = best_cost.latency - op_latency;
2677           synth_mult (alg_in, t / d, &new_limit, mode);
2678
2679           alg_in->cost.cost += op_cost;
2680           alg_in->cost.latency += op_latency;
2681           if (alg_in->cost.latency < op_cost)
2682             alg_in->cost.latency = op_cost;
2683           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2684             {
2685               struct algorithm *x;
2686               best_cost = alg_in->cost;
2687               x = alg_in, alg_in = best_alg, best_alg = x;
2688               best_alg->log[best_alg->ops] = m;
2689               best_alg->op[best_alg->ops] = alg_sub_factor;
2690             }
2691           break;
2692         }
2693     }
2694   if (cache_hit)
2695     goto done;
2696
2697   /* Try shift-and-add (load effective address) instructions,
2698      i.e. do a*3, a*5, a*9.  */
2699   if ((t & 1) != 0)
2700     {
2701     do_alg_add_t2_m:
2702       q = t - 1;
2703       q = q & -q;
2704       m = exact_log2 (q);
2705       if (m >= 0 && m < maxm)
2706         {
2707           op_cost = shiftadd_cost[mode][m];
2708           new_limit.cost = best_cost.cost - op_cost;
2709           new_limit.latency = best_cost.latency - op_cost;
2710           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2711
2712           alg_in->cost.cost += op_cost;
2713           alg_in->cost.latency += op_cost;
2714           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715             {
2716               struct algorithm *x;
2717               best_cost = alg_in->cost;
2718               x = alg_in, alg_in = best_alg, best_alg = x;
2719               best_alg->log[best_alg->ops] = m;
2720               best_alg->op[best_alg->ops] = alg_add_t2_m;
2721             }
2722         }
2723       if (cache_hit)
2724         goto done;
2725
2726     do_alg_sub_t2_m:
2727       q = t + 1;
2728       q = q & -q;
2729       m = exact_log2 (q);
2730       if (m >= 0 && m < maxm)
2731         {
2732           op_cost = shiftsub_cost[mode][m];
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_cost;
2735           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               struct algorithm *x;
2742               best_cost = alg_in->cost;
2743               x = alg_in, alg_in = best_alg, best_alg = x;
2744               best_alg->log[best_alg->ops] = m;
2745               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2746             }
2747         }
2748       if (cache_hit)
2749         goto done;
2750     }
2751
2752  done:
2753   /* If best_cost has not decreased, we have not found any algorithm.  */
2754   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2755     return;
2756
2757   /* Cache the result.  */
2758   if (!cache_hit)
2759     {
2760       alg_hash[hash_index].t = t;
2761       alg_hash[hash_index].mode = mode;
2762       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2763     }
2764
2765   /* If we are getting a too long sequence for `struct algorithm'
2766      to record, make this search fail.  */
2767   if (best_alg->ops == MAX_BITS_PER_WORD)
2768     return;
2769
2770   /* Copy the algorithm from temporary space to the space at alg_out.
2771      We avoid using structure assignment because the majority of
2772      best_alg is normally undefined, and this is a critical function.  */
2773   alg_out->ops = best_alg->ops + 1;
2774   alg_out->cost = best_cost;
2775   memcpy (alg_out->op, best_alg->op,
2776           alg_out->ops * sizeof *alg_out->op);
2777   memcpy (alg_out->log, best_alg->log,
2778           alg_out->ops * sizeof *alg_out->log);
2779 }
2780 \f
2781 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2782    Try three variations:
2783
2784        - a shift/add sequence based on VAL itself
2785        - a shift/add sequence based on -VAL, followed by a negation
2786        - a shift/add sequence based on VAL - 1, followed by an addition.
2787
2788    Return true if the cheapest of these cost less than MULT_COST,
2789    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2790
2791 static bool
2792 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2793                      struct algorithm *alg, enum mult_variant *variant,
2794                      int mult_cost)
2795 {
2796   struct algorithm alg2;
2797   struct mult_cost limit;
2798   int op_cost;
2799
2800   *variant = basic_variant;
2801   limit.cost = mult_cost;
2802   limit.latency = mult_cost;
2803   synth_mult (alg, val, &limit, mode);
2804
2805   /* This works only if the inverted value actually fits in an
2806      `unsigned int' */
2807   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2808     {
2809       op_cost = neg_cost[mode];
2810       if (MULT_COST_LESS (&alg->cost, mult_cost))
2811         {
2812           limit.cost = alg->cost.cost - op_cost;
2813           limit.latency = alg->cost.latency - op_cost;
2814         }
2815       else
2816         {
2817           limit.cost = mult_cost - op_cost;
2818           limit.latency = mult_cost - op_cost;
2819         }
2820
2821       synth_mult (&alg2, -val, &limit, mode);
2822       alg2.cost.cost += op_cost;
2823       alg2.cost.latency += op_cost;
2824       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2825         *alg = alg2, *variant = negate_variant;
2826     }
2827
2828   /* This proves very useful for division-by-constant.  */
2829   op_cost = add_cost[mode];
2830   if (MULT_COST_LESS (&alg->cost, mult_cost))
2831     {
2832       limit.cost = alg->cost.cost - op_cost;
2833       limit.latency = alg->cost.latency - op_cost;
2834     }
2835   else
2836     {
2837       limit.cost = mult_cost - op_cost;
2838       limit.latency = mult_cost - op_cost;
2839     }
2840
2841   synth_mult (&alg2, val - 1, &limit, mode);
2842   alg2.cost.cost += op_cost;
2843   alg2.cost.latency += op_cost;
2844   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2845     *alg = alg2, *variant = add_variant;
2846
2847   return MULT_COST_LESS (&alg->cost, mult_cost);
2848 }
2849
2850 /* A subroutine of expand_mult, used for constant multiplications.
2851    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2852    convenient.  Use the shift/add sequence described by ALG and apply
2853    the final fixup specified by VARIANT.  */
2854
2855 static rtx
2856 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2857                    rtx target, const struct algorithm *alg,
2858                    enum mult_variant variant)
2859 {
2860   HOST_WIDE_INT val_so_far;
2861   rtx insn, accum, tem;
2862   int opno;
2863   enum machine_mode nmode;
2864
2865   /* Avoid referencing memory over and over.
2866      For speed, but also for correctness when mem is volatile.  */
2867   if (MEM_P (op0))
2868     op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, const0_rtx);
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2900                                 build_int_cst (NULL_TREE, log),
2901                                 NULL_RTX, 0);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2907                               build_int_cst (NULL_TREE, log),
2908                               NULL_RTX, 0);
2909           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2910                                  add_target ? add_target : accum_target);
2911           val_so_far += (HOST_WIDE_INT) 1 << log;
2912           break;
2913
2914         case alg_sub_t_m2:
2915           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2916                               build_int_cst (NULL_TREE, log),
2917                               NULL_RTX, 0);
2918           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2919                                  add_target ? add_target : accum_target);
2920           val_so_far -= (HOST_WIDE_INT) 1 << log;
2921           break;
2922
2923         case alg_add_t2_m:
2924           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2925                                 build_int_cst (NULL_TREE, log),
2926                                 shift_subtarget,
2927                                 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 build_int_cst (NULL_TREE, log),
2936                                 shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) - 1;
2940           break;
2941
2942         case alg_add_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2944                               build_int_cst (NULL_TREE, log),
2945                               NULL_RTX, 0);
2946           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far += val_so_far << log;
2949           break;
2950
2951         case alg_sub_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2953                               build_int_cst (NULL_TREE, log),
2954                               NULL_RTX, 0);
2955           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2956                                  (add_target
2957                                   ? add_target : (optimize ? 0 : tem)));
2958           val_so_far = (val_so_far << log) - val_so_far;
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964
2965       /* Write a REG_EQUAL note on the last insn so that we can cse
2966          multiplication sequences.  Note that if ACCUM is a SUBREG,
2967          we've set the inner register and must properly indicate
2968          that.  */
2969
2970       tem = op0, nmode = mode;
2971       if (GET_CODE (accum) == SUBREG)
2972         {
2973           nmode = GET_MODE (SUBREG_REG (accum));
2974           tem = gen_lowpart (nmode, op0);
2975         }
2976
2977       insn = get_last_insn ();
2978       set_unique_reg_note (insn, REG_EQUAL,
2979                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2980     }
2981
2982   if (variant == negate_variant)
2983     {
2984       val_so_far = -val_so_far;
2985       accum = expand_unop (mode, neg_optab, accum, target, 0);
2986     }
2987   else if (variant == add_variant)
2988     {
2989       val_so_far = val_so_far + 1;
2990       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2991     }
2992
2993   /* Compare only the bits of val and val_so_far that are significant
2994      in the result mode, to avoid sign-/zero-extension confusion.  */
2995   val &= GET_MODE_MASK (mode);
2996   val_so_far &= GET_MODE_MASK (mode);
2997   gcc_assert (val == val_so_far);
2998
2999   return accum;
3000 }
3001
3002 /* Perform a multiplication and return an rtx for the result.
3003    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3004    TARGET is a suggestion for where to store the result (an rtx).
3005
3006    We check specially for a constant integer as OP1.
3007    If you want this check for OP0 as well, then before calling
3008    you should swap the two operands if OP0 would be constant.  */
3009
3010 rtx
3011 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3012              int unsignedp)
3013 {
3014   rtx const_op1 = op1;
3015   enum mult_variant variant;
3016   struct algorithm algorithm;
3017
3018   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3019      less than or equal in size to `unsigned int' this doesn't matter.
3020      If the mode is larger than `unsigned int', then synth_mult works only
3021      if the constant value exactly fits in an `unsigned int' without any
3022      truncation.  This means that multiplying by negative values does
3023      not work; results are off by 2^32 on a 32 bit machine.  */
3024
3025   /* If we are multiplying in DImode, it may still be a win
3026      to try to work with shifts and adds.  */
3027   if (GET_CODE (op1) == CONST_DOUBLE
3028       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3029       && HOST_BITS_PER_INT >= BITS_PER_WORD
3030       && CONST_DOUBLE_HIGH (op1) == 0)
3031     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3032   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3033            && GET_CODE (op1) == CONST_INT
3034            && INTVAL (op1) < 0)
3035     const_op1 = 0;
3036
3037   /* We used to test optimize here, on the grounds that it's better to
3038      produce a smaller program when -O is not used.
3039      But this causes such a terrible slowdown sometimes
3040      that it seems better to use synth_mult always.  */
3041
3042   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3043       && (unsignedp || !flag_trapv))
3044     {
3045       HOST_WIDE_INT coeff = INTVAL (const_op1);
3046       int mult_cost;
3047
3048       /* Special case powers of two.  */
3049       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3050         {
3051           if (coeff == 0)
3052             return const0_rtx;
3053           if (coeff == 1)
3054             return op0;
3055           return expand_shift (LSHIFT_EXPR, mode, op0,
3056                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3057                                target, unsignedp);
3058         }
3059
3060       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3061       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3062                                mult_cost))
3063         return expand_mult_const (mode, op0, coeff, target,
3064                                   &algorithm, variant);
3065     }
3066
3067   if (GET_CODE (op0) == CONST_DOUBLE)
3068     {
3069       rtx temp = op0;
3070       op0 = op1;
3071       op1 = temp;
3072     }
3073
3074   /* Expand x*2.0 as x+x.  */
3075   if (GET_CODE (op1) == CONST_DOUBLE
3076       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3077     {
3078       REAL_VALUE_TYPE d;
3079       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3080
3081       if (REAL_VALUES_EQUAL (d, dconst2))
3082         {
3083           op0 = force_reg (GET_MODE (op0), op0);
3084           return expand_binop (mode, add_optab, op0, op0,
3085                                target, unsignedp, OPTAB_LIB_WIDEN);
3086         }
3087     }
3088
3089   /* This used to use umul_optab if unsigned, but for non-widening multiply
3090      there is no difference between signed and unsigned.  */
3091   op0 = expand_binop (mode,
3092                       ! unsignedp
3093                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3094                       ? smulv_optab : smul_optab,
3095                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3096   gcc_assert (op0);
3097   return op0;
3098 }
3099 \f
3100 /* Return the smallest n such that 2**n >= X.  */
3101
3102 int
3103 ceil_log2 (unsigned HOST_WIDE_INT x)
3104 {
3105   return floor_log2 (x - 1) + 1;
3106 }
3107
3108 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3109    replace division by D, and put the least significant N bits of the result
3110    in *MULTIPLIER_PTR and return the most significant bit.
3111
3112    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3113    needed precision is in PRECISION (should be <= N).
3114
3115    PRECISION should be as small as possible so this function can choose
3116    multiplier more freely.
3117
3118    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3119    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3120
3121    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3122    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3123
3124 static
3125 unsigned HOST_WIDE_INT
3126 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3127                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3128 {
3129   HOST_WIDE_INT mhigh_hi, mlow_hi;
3130   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3131   int lgup, post_shift;
3132   int pow, pow2;
3133   unsigned HOST_WIDE_INT nl, dummy1;
3134   HOST_WIDE_INT nh, dummy2;
3135
3136   /* lgup = ceil(log2(divisor)); */
3137   lgup = ceil_log2 (d);
3138
3139   gcc_assert (lgup <= n);
3140
3141   pow = n + lgup;
3142   pow2 = n + lgup - precision;
3143
3144   /* We could handle this with some effort, but this case is much
3145      better handled directly with a scc insn, so rely on caller using
3146      that.  */
3147   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3148
3149   /* mlow = 2^(N + lgup)/d */
3150  if (pow >= HOST_BITS_PER_WIDE_INT)
3151     {
3152       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3153       nl = 0;
3154     }
3155   else
3156     {
3157       nh = 0;
3158       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3159     }
3160   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3161                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3162
3163   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3164   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3165     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3166   else
3167     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3168   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3169                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3170
3171   gcc_assert (!mhigh_hi || nh - d < d);
3172   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3173   /* Assert that mlow < mhigh.  */
3174   gcc_assert (mlow_hi < mhigh_hi
3175               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3176
3177   /* If precision == N, then mlow, mhigh exceed 2^N
3178      (but they do not exceed 2^(N+1)).  */
3179
3180   /* Reduce to lowest terms.  */
3181   for (post_shift = lgup; post_shift > 0; post_shift--)
3182     {
3183       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3184       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3185       if (ml_lo >= mh_lo)
3186         break;
3187
3188       mlow_hi = 0;
3189       mlow_lo = ml_lo;
3190       mhigh_hi = 0;
3191       mhigh_lo = mh_lo;
3192     }
3193
3194   *post_shift_ptr = post_shift;
3195   *lgup_ptr = lgup;
3196   if (n < HOST_BITS_PER_WIDE_INT)
3197     {
3198       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3199       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3200       return mhigh_lo >= mask;
3201     }
3202   else
3203     {
3204       *multiplier_ptr = GEN_INT (mhigh_lo);
3205       return mhigh_hi;
3206     }
3207 }
3208
3209 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3210    congruent to 1 (mod 2**N).  */
3211
3212 static unsigned HOST_WIDE_INT
3213 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3214 {
3215   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3216
3217   /* The algorithm notes that the choice y = x satisfies
3218      x*y == 1 mod 2^3, since x is assumed odd.
3219      Each iteration doubles the number of bits of significance in y.  */
3220
3221   unsigned HOST_WIDE_INT mask;
3222   unsigned HOST_WIDE_INT y = x;
3223   int nbit = 3;
3224
3225   mask = (n == HOST_BITS_PER_WIDE_INT
3226           ? ~(unsigned HOST_WIDE_INT) 0
3227           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3228
3229   while (nbit < n)
3230     {
3231       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3232       nbit *= 2;
3233     }
3234   return y;
3235 }
3236
3237 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3238    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3239    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3240    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3241    become signed.
3242
3243    The result is put in TARGET if that is convenient.
3244
3245    MODE is the mode of operation.  */
3246
3247 rtx
3248 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3249                              rtx op1, rtx target, int unsignedp)
3250 {
3251   rtx tem;
3252   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3253
3254   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3255                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3256                       NULL_RTX, 0);
3257   tem = expand_and (mode, tem, op1, NULL_RTX);
3258   adj_operand
3259     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3260                      adj_operand);
3261
3262   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3263                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3264                       NULL_RTX, 0);
3265   tem = expand_and (mode, tem, op0, NULL_RTX);
3266   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3267                           target);
3268
3269   return target;
3270 }
3271
3272 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3273
3274 static rtx
3275 extract_high_half (enum machine_mode mode, rtx op)
3276 {
3277   enum machine_mode wider_mode;
3278
3279   if (mode == word_mode)
3280     return gen_highpart (mode, op);
3281
3282   wider_mode = GET_MODE_WIDER_MODE (mode);
3283   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3284                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3285   return convert_modes (mode, wider_mode, op, 0);
3286 }
3287
3288 /* Like expand_mult_highpart, but only consider using a multiplication
3289    optab.  OP1 is an rtx for the constant operand.  */
3290
3291 static rtx
3292 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3293                             rtx target, int unsignedp, int max_cost)
3294 {
3295   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3296   enum machine_mode wider_mode;
3297   optab moptab;
3298   rtx tem;
3299   int size;
3300
3301   wider_mode = GET_MODE_WIDER_MODE (mode);
3302   size = GET_MODE_BITSIZE (mode);
3303
3304   /* Firstly, try using a multiplication insn that only generates the needed
3305      high part of the product, and in the sign flavor of unsignedp.  */
3306   if (mul_highpart_cost[mode] < max_cost)
3307     {
3308       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3309       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3310                           unsignedp, OPTAB_DIRECT);
3311       if (tem)
3312         return tem;
3313     }
3314
3315   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3316      Need to adjust the result after the multiplication.  */
3317   if (size - 1 < BITS_PER_WORD
3318       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3319           + 4 * add_cost[mode] < max_cost))
3320     {
3321       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3322       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3323                           unsignedp, OPTAB_DIRECT);
3324       if (tem)
3325         /* We used the wrong signedness.  Adjust the result.  */
3326         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3327                                             tem, unsignedp);
3328     }
3329
3330   /* Try widening multiplication.  */
3331   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3332   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3333       && mul_widen_cost[wider_mode] < max_cost)
3334     {
3335       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3336                           unsignedp, OPTAB_WIDEN);
3337       if (tem)
3338         return extract_high_half (mode, tem);
3339     }
3340
3341   /* Try widening the mode and perform a non-widening multiplication.  */
3342   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3343       && size - 1 < BITS_PER_WORD
3344       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3345     {
3346       rtx insns, wop0, wop1;
3347
3348       /* We need to widen the operands, for example to ensure the
3349          constant multiplier is correctly sign or zero extended.
3350          Use a sequence to clean-up any instructions emitted by
3351          the conversions if things don't work out.  */
3352       start_sequence ();
3353       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3354       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3355       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3356                           unsignedp, OPTAB_WIDEN);
3357       insns = get_insns ();
3358       end_sequence ();
3359
3360       if (tem)
3361         {
3362           emit_insn (insns);
3363           return extract_high_half (mode, tem);
3364         }
3365     }
3366
3367   /* Try widening multiplication of opposite signedness, and adjust.  */
3368   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3369   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3370       && size - 1 < BITS_PER_WORD
3371       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3372           + 4 * add_cost[mode] < max_cost))
3373     {
3374       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3375                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3376       if (tem != 0)
3377         {
3378           tem = extract_high_half (mode, tem);
3379           /* We used the wrong signedness.  Adjust the result.  */
3380           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3381                                               target, unsignedp);
3382         }
3383     }
3384
3385   return 0;
3386 }
3387
3388 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3389    putting the high half of the result in TARGET if that is convenient,
3390    and return where the result is.  If the operation can not be performed,
3391    0 is returned.
3392
3393    MODE is the mode of operation and result.
3394
3395    UNSIGNEDP nonzero means unsigned multiply.
3396
3397    MAX_COST is the total allowed cost for the expanded RTL.  */
3398
3399 static rtx
3400 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3401                       rtx target, int unsignedp, int max_cost)
3402 {
3403   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3404   unsigned HOST_WIDE_INT cnst1;
3405   int extra_cost;
3406   bool sign_adjust = false;
3407   enum mult_variant variant;
3408   struct algorithm alg;
3409   rtx tem;
3410
3411   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3412   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3413
3414   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3415
3416   /* We can't optimize modes wider than BITS_PER_WORD.
3417      ??? We might be able to perform double-word arithmetic if
3418      mode == word_mode, however all the cost calculations in
3419      synth_mult etc. assume single-word operations.  */
3420   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3421     return expand_mult_highpart_optab (mode, op0, op1, target,
3422                                        unsignedp, max_cost);
3423
3424   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3425
3426   /* Check whether we try to multiply by a negative constant.  */
3427   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3428     {
3429       sign_adjust = true;
3430       extra_cost += add_cost[mode];
3431     }
3432
3433   /* See whether shift/add multiplication is cheap enough.  */
3434   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3435                            max_cost - extra_cost))
3436     {
3437       /* See whether the specialized multiplication optabs are
3438          cheaper than the shift/add version.  */
3439       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3440                                         alg.cost.cost + extra_cost);
3441       if (tem)
3442         return tem;
3443
3444       tem = convert_to_mode (wider_mode, op0, unsignedp);
3445       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3446       tem = extract_high_half (mode, tem);
3447
3448       /* Adjust result for signedness.  */
3449       if (sign_adjust)
3450         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3451
3452       return tem;
3453     }
3454   return expand_mult_highpart_optab (mode, op0, op1, target,
3455                                      unsignedp, max_cost);
3456 }
3457
3458
3459 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3460
3461 static rtx
3462 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3463 {
3464   unsigned HOST_WIDE_INT masklow, maskhigh;
3465   rtx result, temp, shift, label;
3466   int logd;
3467
3468   logd = floor_log2 (d);
3469   result = gen_reg_rtx (mode);
3470
3471   /* Avoid conditional branches when they're expensive.  */
3472   if (BRANCH_COST >= 2
3473       && !optimize_size)
3474     {
3475       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3476                                       mode, 0, -1);
3477       if (signmask)
3478         {
3479           signmask = force_reg (mode, signmask);
3480           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3481           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3482
3483           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3484              which instruction sequence to use.  If logical right shifts
3485              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3486              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3487
3488           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3489           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3490               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3491             {
3492               temp = expand_binop (mode, xor_optab, op0, signmask,
3493                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3494               temp = expand_binop (mode, sub_optab, temp, signmask,
3495                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3496               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3497                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3498               temp = expand_binop (mode, xor_optab, temp, signmask,
3499                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3500               temp = expand_binop (mode, sub_optab, temp, signmask,
3501                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3502             }
3503           else
3504             {
3505               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3506                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3507               signmask = force_reg (mode, signmask);
3508
3509               temp = expand_binop (mode, add_optab, op0, signmask,
3510                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3511               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3512                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3513               temp = expand_binop (mode, sub_optab, temp, signmask,
3514                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3515             }
3516           return temp;
3517         }
3518     }
3519
3520   /* Mask contains the mode's signbit and the significant bits of the
3521      modulus.  By including the signbit in the operation, many targets
3522      can avoid an explicit compare operation in the following comparison
3523      against zero.  */
3524
3525   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3526   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3527     {
3528       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3529       maskhigh = -1;
3530     }
3531   else
3532     maskhigh = (HOST_WIDE_INT) -1
3533                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3534
3535   temp = expand_binop (mode, and_optab, op0,
3536                        immed_double_const (masklow, maskhigh, mode),
3537                        result, 1, OPTAB_LIB_WIDEN);
3538   if (temp != result)
3539     emit_move_insn (result, temp);
3540
3541   label = gen_label_rtx ();
3542   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3543
3544   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3545                        0, OPTAB_LIB_WIDEN);
3546   masklow = (HOST_WIDE_INT) -1 << logd;
3547   maskhigh = -1;
3548   temp = expand_binop (mode, ior_optab, temp,
3549                        immed_double_const (masklow, maskhigh, mode),
3550                        result, 1, OPTAB_LIB_WIDEN);
3551   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3552                        0, OPTAB_LIB_WIDEN);
3553   if (temp != result)
3554     emit_move_insn (result, temp);
3555   emit_label (label);
3556   return result;
3557 }
3558
3559 /* Expand signed division of OP0 by a power of two D in mode MODE.
3560    This routine is only called for positive values of D.  */
3561
3562 static rtx
3563 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3564 {
3565   rtx temp, label;
3566   tree shift;
3567   int logd;
3568
3569   logd = floor_log2 (d);
3570   shift = build_int_cst (NULL_TREE, logd);
3571
3572   if (d == 2 && BRANCH_COST >= 1)
3573     {
3574       temp = gen_reg_rtx (mode);
3575       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3576       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3577                            0, OPTAB_LIB_WIDEN);
3578       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3579     }
3580
3581 #ifdef HAVE_conditional_move
3582   if (BRANCH_COST >= 2)
3583     {
3584       rtx temp2;
3585
3586       /* ??? emit_conditional_move forces a stack adjustment via
3587          compare_from_rtx so, if the sequence is discarded, it will
3588          be lost.  Do it now instead.  */
3589       do_pending_stack_adjust ();
3590
3591       start_sequence ();
3592       temp2 = copy_to_mode_reg (mode, op0);
3593       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3594                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3595       temp = force_reg (mode, temp);
3596
3597       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3598       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3599                                      mode, temp, temp2, mode, 0);
3600       if (temp2)
3601         {
3602           rtx seq = get_insns ();
3603           end_sequence ();
3604           emit_insn (seq);
3605           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3606         }
3607       end_sequence ();
3608     }
3609 #endif
3610
3611   if (BRANCH_COST >= 2)
3612     {
3613       int ushift = GET_MODE_BITSIZE (mode) - logd;
3614
3615       temp = gen_reg_rtx (mode);
3616       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3617       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3618         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3619                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3620       else
3621         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3622                              build_int_cst (NULL_TREE, ushift),
3623                              NULL_RTX, 1);
3624       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3625                            0, OPTAB_LIB_WIDEN);
3626       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3627     }
3628
3629   label = gen_label_rtx ();
3630   temp = copy_to_mode_reg (mode, op0);
3631   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3632   expand_inc (temp, GEN_INT (d - 1));
3633   emit_label (label);
3634   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3635 }
3636 \f
3637 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3638    if that is convenient, and returning where the result is.
3639    You may request either the quotient or the remainder as the result;
3640    specify REM_FLAG nonzero to get the remainder.
3641
3642    CODE is the expression code for which kind of division this is;
3643    it controls how rounding is done.  MODE is the machine mode to use.
3644    UNSIGNEDP nonzero means do unsigned division.  */
3645
3646 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3647    and then correct it by or'ing in missing high bits
3648    if result of ANDI is nonzero.
3649    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3650    This could optimize to a bfexts instruction.
3651    But C doesn't use these operations, so their optimizations are
3652    left for later.  */
3653 /* ??? For modulo, we don't actually need the highpart of the first product,
3654    the low part will do nicely.  And for small divisors, the second multiply
3655    can also be a low-part only multiply or even be completely left out.
3656    E.g. to calculate the remainder of a division by 3 with a 32 bit
3657    multiply, multiply with 0x55555556 and extract the upper two bits;
3658    the result is exact for inputs up to 0x1fffffff.
3659    The input range can be reduced by using cross-sum rules.
3660    For odd divisors >= 3, the following table gives right shift counts
3661    so that if a number is shifted by an integer multiple of the given
3662    amount, the remainder stays the same:
3663    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3664    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3665    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3666    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3667    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3668
3669    Cross-sum rules for even numbers can be derived by leaving as many bits
3670    to the right alone as the divisor has zeros to the right.
3671    E.g. if x is an unsigned 32 bit number:
3672    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3673    */
3674
3675 rtx
3676 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3677                rtx op0, rtx op1, rtx target, int unsignedp)
3678 {
3679   enum machine_mode compute_mode;
3680   rtx tquotient;
3681   rtx quotient = 0, remainder = 0;
3682   rtx last;
3683   int size;
3684   rtx insn, set;
3685   optab optab1, optab2;
3686   int op1_is_constant, op1_is_pow2 = 0;
3687   int max_cost, extra_cost;
3688   static HOST_WIDE_INT last_div_const = 0;
3689   static HOST_WIDE_INT ext_op1;
3690
3691   op1_is_constant = GET_CODE (op1) == CONST_INT;
3692   if (op1_is_constant)
3693     {
3694       ext_op1 = INTVAL (op1);
3695       if (unsignedp)
3696         ext_op1 &= GET_MODE_MASK (mode);
3697       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3698                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3699     }
3700
3701   /*
3702      This is the structure of expand_divmod:
3703
3704      First comes code to fix up the operands so we can perform the operations
3705      correctly and efficiently.
3706
3707      Second comes a switch statement with code specific for each rounding mode.
3708      For some special operands this code emits all RTL for the desired
3709      operation, for other cases, it generates only a quotient and stores it in
3710      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3711      to indicate that it has not done anything.
3712
3713      Last comes code that finishes the operation.  If QUOTIENT is set and
3714      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3715      QUOTIENT is not set, it is computed using trunc rounding.
3716
3717      We try to generate special code for division and remainder when OP1 is a
3718      constant.  If |OP1| = 2**n we can use shifts and some other fast
3719      operations.  For other values of OP1, we compute a carefully selected
3720      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3721      by m.
3722
3723      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3724      half of the product.  Different strategies for generating the product are
3725      implemented in expand_mult_highpart.
3726
3727      If what we actually want is the remainder, we generate that by another
3728      by-constant multiplication and a subtraction.  */
3729
3730   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3731      code below will malfunction if we are, so check here and handle
3732      the special case if so.  */
3733   if (op1 == const1_rtx)
3734     return rem_flag ? const0_rtx : op0;
3735
3736     /* When dividing by -1, we could get an overflow.
3737      negv_optab can handle overflows.  */
3738   if (! unsignedp && op1 == constm1_rtx)
3739     {
3740       if (rem_flag)
3741         return const0_rtx;
3742       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3743                           ? negv_optab : neg_optab, op0, target, 0);
3744     }
3745
3746   if (target
3747       /* Don't use the function value register as a target
3748          since we have to read it as well as write it,
3749          and function-inlining gets confused by this.  */
3750       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3751           /* Don't clobber an operand while doing a multi-step calculation.  */
3752           || ((rem_flag || op1_is_constant)
3753               && (reg_mentioned_p (target, op0)
3754                   || (MEM_P (op0) && MEM_P (target))))
3755           || reg_mentioned_p (target, op1)
3756           || (MEM_P (op1) && MEM_P (target))))
3757     target = 0;
3758
3759   /* Get the mode in which to perform this computation.  Normally it will
3760      be MODE, but sometimes we can't do the desired operation in MODE.
3761      If so, pick a wider mode in which we can do the operation.  Convert
3762      to that mode at the start to avoid repeated conversions.
3763
3764      First see what operations we need.  These depend on the expression
3765      we are evaluating.  (We assume that divxx3 insns exist under the
3766      same conditions that modxx3 insns and that these insns don't normally
3767      fail.  If these assumptions are not correct, we may generate less
3768      efficient code in some cases.)
3769
3770      Then see if we find a mode in which we can open-code that operation
3771      (either a division, modulus, or shift).  Finally, check for the smallest
3772      mode for which we can do the operation with a library call.  */
3773
3774   /* We might want to refine this now that we have division-by-constant
3775      optimization.  Since expand_mult_highpart tries so many variants, it is
3776      not straightforward to generalize this.  Maybe we should make an array
3777      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3778
3779   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3780             ? (unsignedp ? lshr_optab : ashr_optab)
3781             : (unsignedp ? udiv_optab : sdiv_optab));
3782   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3783             ? optab1
3784             : (unsignedp ? udivmod_optab : sdivmod_optab));
3785
3786   for (compute_mode = mode; compute_mode != VOIDmode;
3787        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3788     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3789         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3790       break;
3791
3792   if (compute_mode == VOIDmode)
3793     for (compute_mode = mode; compute_mode != VOIDmode;
3794          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3795       if (optab1->handlers[compute_mode].libfunc
3796           || optab2->handlers[compute_mode].libfunc)
3797         break;
3798
3799   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3800      in expand_binop.  */
3801   if (compute_mode == VOIDmode)
3802     compute_mode = mode;
3803
3804   if (target && GET_MODE (target) == compute_mode)
3805     tquotient = target;
3806   else
3807     tquotient = gen_reg_rtx (compute_mode);
3808
3809   size = GET_MODE_BITSIZE (compute_mode);
3810 #if 0
3811   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3812      (mode), and thereby get better code when OP1 is a constant.  Do that
3813      later.  It will require going over all usages of SIZE below.  */
3814   size = GET_MODE_BITSIZE (mode);
3815 #endif
3816
3817   /* Only deduct something for a REM if the last divide done was
3818      for a different constant.   Then set the constant of the last
3819      divide.  */
3820   max_cost = div_cost[compute_mode]
3821     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3822                       && INTVAL (op1) == last_div_const)
3823        ? mul_cost[compute_mode] + add_cost[compute_mode]
3824        : 0);
3825
3826   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3827
3828   /* Now convert to the best mode to use.  */
3829   if (compute_mode != mode)
3830     {
3831       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3832       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3833
3834       /* convert_modes may have placed op1 into a register, so we
3835          must recompute the following.  */
3836       op1_is_constant = GET_CODE (op1) == CONST_INT;
3837       op1_is_pow2 = (op1_is_constant
3838                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3839                           || (! unsignedp
3840                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3841     }
3842
3843   /* If one of the operands is a volatile MEM, copy it into a register.  */
3844
3845   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3846     op0 = force_reg (compute_mode, op0);
3847   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3848     op1 = force_reg (compute_mode, op1);
3849
3850   /* If we need the remainder or if OP1 is constant, we need to
3851      put OP0 in a register in case it has any queued subexpressions.  */
3852   if (rem_flag || op1_is_constant)
3853     op0 = force_reg (compute_mode, op0);
3854
3855   last = get_last_insn ();
3856
3857   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3858   if (unsignedp)
3859     {
3860       if (code == FLOOR_DIV_EXPR)
3861         code = TRUNC_DIV_EXPR;
3862       if (code == FLOOR_MOD_EXPR)
3863         code = TRUNC_MOD_EXPR;
3864       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3865         code = TRUNC_DIV_EXPR;
3866     }
3867
3868   if (op1 != const0_rtx)
3869     switch (code)
3870       {
3871       case TRUNC_MOD_EXPR:
3872       case TRUNC_DIV_EXPR:
3873         if (op1_is_constant)
3874           {
3875             if (unsignedp)
3876               {
3877                 unsigned HOST_WIDE_INT mh;
3878                 int pre_shift, post_shift;
3879                 int dummy;
3880                 rtx ml;
3881                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3882                                             & GET_MODE_MASK (compute_mode));
3883
3884                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3885                   {
3886                     pre_shift = floor_log2 (d);
3887                     if (rem_flag)
3888                       {
3889                         remainder
3890                           = expand_binop (compute_mode, and_optab, op0,
3891                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3892                                           remainder, 1,
3893                                           OPTAB_LIB_WIDEN);
3894                         if (remainder)
3895                           return gen_lowpart (mode, remainder);
3896                       }
3897                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3898                                              build_int_cst (NULL_TREE,
3899                                                             pre_shift),
3900                                              tquotient, 1);
3901                   }
3902                 else if (size <= HOST_BITS_PER_WIDE_INT)
3903                   {
3904                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3905                       {
3906                         /* Most significant bit of divisor is set; emit an scc
3907                            insn.  */
3908                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3909                                                     compute_mode, 1, 1);
3910                         if (quotient == 0)
3911                           goto fail1;
3912                       }
3913                     else
3914                       {
3915                         /* Find a suitable multiplier and right shift count
3916                            instead of multiplying with D.  */
3917
3918                         mh = choose_multiplier (d, size, size,
3919                                                 &ml, &post_shift, &dummy);
3920
3921                         /* If the suggested multiplier is more than SIZE bits,
3922                            we can do better for even divisors, using an
3923                            initial right shift.  */
3924                         if (mh != 0 && (d & 1) == 0)
3925                           {
3926                             pre_shift = floor_log2 (d & -d);
3927                             mh = choose_multiplier (d >> pre_shift, size,
3928                                                     size - pre_shift,
3929                                                     &ml, &post_shift, &dummy);
3930                             gcc_assert (!mh);
3931                           }
3932                         else
3933                           pre_shift = 0;
3934
3935                         if (mh != 0)
3936                           {
3937                             rtx t1, t2, t3, t4;
3938
3939                             if (post_shift - 1 >= BITS_PER_WORD)
3940                               goto fail1;
3941
3942                             extra_cost
3943                               = (shift_cost[compute_mode][post_shift - 1]
3944                                  + shift_cost[compute_mode][1]
3945                                  + 2 * add_cost[compute_mode]);
3946                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3947                                                        NULL_RTX, 1,
3948                                                        max_cost - extra_cost);
3949                             if (t1 == 0)
3950                               goto fail1;
3951                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3952                                                                op0, t1),
3953                                                 NULL_RTX);
3954                             t3 = expand_shift
3955                               (RSHIFT_EXPR, compute_mode, t2,
3956                                build_int_cst (NULL_TREE, 1),
3957                                NULL_RTX,1);
3958                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3959                                                               t1, t3),
3960                                                 NULL_RTX);
3961                             quotient = expand_shift
3962                               (RSHIFT_EXPR, compute_mode, t4,
3963                                build_int_cst (NULL_TREE, post_shift - 1),
3964                                tquotient, 1);
3965                           }
3966                         else
3967                           {
3968                             rtx t1, t2;
3969
3970                             if (pre_shift >= BITS_PER_WORD
3971                                 || post_shift >= BITS_PER_WORD)
3972                               goto fail1;
3973
3974                             t1 = expand_shift
3975                               (RSHIFT_EXPR, compute_mode, op0,
3976                                build_int_cst (NULL_TREE, pre_shift),
3977                                NULL_RTX, 1);
3978                             extra_cost
3979                               = (shift_cost[compute_mode][pre_shift]
3980                                  + shift_cost[compute_mode][post_shift]);
3981                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3982                                                        NULL_RTX, 1,
3983                                                        max_cost - extra_cost);
3984                             if (t2 == 0)
3985                               goto fail1;
3986                             quotient = expand_shift
3987                               (RSHIFT_EXPR, compute_mode, t2,
3988                                build_int_cst (NULL_TREE, post_shift),
3989                                tquotient, 1);
3990                           }
3991                       }
3992                   }
3993                 else            /* Too wide mode to use tricky code */
3994                   break;
3995
3996                 insn = get_last_insn ();
3997                 if (insn != last
3998                     && (set = single_set (insn)) != 0
3999                     && SET_DEST (set) == quotient)
4000                   set_unique_reg_note (insn,
4001                                        REG_EQUAL,
4002                                        gen_rtx_UDIV (compute_mode, op0, op1));
4003               }
4004             else                /* TRUNC_DIV, signed */
4005               {
4006                 unsigned HOST_WIDE_INT ml;
4007                 int lgup, post_shift;
4008                 rtx mlr;
4009                 HOST_WIDE_INT d = INTVAL (op1);
4010                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4011
4012                 /* n rem d = n rem -d */
4013                 if (rem_flag && d < 0)
4014                   {
4015                     d = abs_d;
4016                     op1 = gen_int_mode (abs_d, compute_mode);
4017                   }
4018
4019                 if (d == 1)
4020                   quotient = op0;
4021                 else if (d == -1)
4022                   quotient = expand_unop (compute_mode, neg_optab, op0,
4023                                           tquotient, 0);
4024                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4025                   {
4026                     /* This case is not handled correctly below.  */
4027                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4028                                                 compute_mode, 1, 1);
4029                     if (quotient == 0)
4030                       goto fail1;
4031                   }
4032                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4033                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4034                                       : sdiv_pow2_cheap[compute_mode])
4035                          /* We assume that cheap metric is true if the
4036                             optab has an expander for this mode.  */
4037                          && (((rem_flag ? smod_optab : sdiv_optab)
4038                               ->handlers[compute_mode].insn_code
4039                               != CODE_FOR_nothing)
4040                              || (sdivmod_optab->handlers[compute_mode]
4041                                  .insn_code != CODE_FOR_nothing)))
4042                   ;
4043                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4044                   {
4045                     if (rem_flag)
4046                       {
4047                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4048                         if (remainder)
4049                           return gen_lowpart (mode, remainder);
4050                       }
4051
4052                     if (sdiv_pow2_cheap[compute_mode]
4053                         && ((sdiv_optab->handlers[compute_mode].insn_code
4054                              != CODE_FOR_nothing)
4055                             || (sdivmod_optab->handlers[compute_mode].insn_code
4056                                 != CODE_FOR_nothing)))
4057                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4058                                                 compute_mode, op0,
4059                                                 gen_int_mode (abs_d,
4060                                                               compute_mode),
4061                                                 NULL_RTX, 0);
4062                     else
4063                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4064
4065                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4066                        negate the quotient.  */
4067                     if (d < 0)
4068                       {
4069                         insn = get_last_insn ();
4070                         if (insn != last
4071                             && (set = single_set (insn)) != 0
4072                             && SET_DEST (set) == quotient
4073                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4074                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4075                           set_unique_reg_note (insn,
4076                                                REG_EQUAL,
4077                                                gen_rtx_DIV (compute_mode,
4078                                                             op0,
4079                                                             GEN_INT
4080                                                             (trunc_int_for_mode
4081                                                              (abs_d,
4082                                                               compute_mode))));
4083
4084                         quotient = expand_unop (compute_mode, neg_optab,
4085                                                 quotient, quotient, 0);
4086                       }
4087                   }
4088                 else if (size <= HOST_BITS_PER_WIDE_INT)
4089                   {
4090                     choose_multiplier (abs_d, size, size - 1,
4091                                        &mlr, &post_shift, &lgup);
4092                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4093                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4094                       {
4095                         rtx t1, t2, t3;
4096
4097                         if (post_shift >= BITS_PER_WORD
4098                             || size - 1 >= BITS_PER_WORD)
4099                           goto fail1;
4100
4101                         extra_cost = (shift_cost[compute_mode][post_shift]
4102                                       + shift_cost[compute_mode][size - 1]
4103                                       + add_cost[compute_mode]);
4104                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4105                                                    NULL_RTX, 0,
4106                                                    max_cost - extra_cost);
4107                         if (t1 == 0)
4108                           goto fail1;
4109                         t2 = expand_shift
4110                           (RSHIFT_EXPR, compute_mode, t1,
4111                            build_int_cst (NULL_TREE, post_shift),
4112                            NULL_RTX, 0);
4113                         t3 = expand_shift
4114                           (RSHIFT_EXPR, compute_mode, op0,
4115                            build_int_cst (NULL_TREE, size - 1),
4116                            NULL_RTX, 0);
4117                         if (d < 0)
4118                           quotient
4119                             = force_operand (gen_rtx_MINUS (compute_mode,
4120                                                             t3, t2),
4121                                              tquotient);
4122                         else
4123                           quotient
4124                             = force_operand (gen_rtx_MINUS (compute_mode,
4125                                                             t2, t3),
4126                                              tquotient);
4127                       }
4128                     else
4129                       {
4130                         rtx t1, t2, t3, t4;
4131
4132                         if (post_shift >= BITS_PER_WORD
4133                             || size - 1 >= BITS_PER_WORD)
4134                           goto fail1;
4135
4136                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4137                         mlr = gen_int_mode (ml, compute_mode);
4138                         extra_cost = (shift_cost[compute_mode][post_shift]
4139                                       + shift_cost[compute_mode][size - 1]
4140                                       + 2 * add_cost[compute_mode]);
4141                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4142                                                    NULL_RTX, 0,
4143                                                    max_cost - extra_cost);
4144                         if (t1 == 0)
4145                           goto fail1;
4146                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4147                                                           t1, op0),
4148                                             NULL_RTX);
4149                         t3 = expand_shift
4150                           (RSHIFT_EXPR, compute_mode, t2,
4151                            build_int_cst (NULL_TREE, post_shift),
4152                            NULL_RTX, 0);
4153                         t4 = expand_shift
4154                           (RSHIFT_EXPR, compute_mode, op0,
4155                            build_int_cst (NULL_TREE, size - 1),
4156                            NULL_RTX, 0);
4157                         if (d < 0)
4158                           quotient
4159                             = force_operand (gen_rtx_MINUS (compute_mode,
4160                                                             t4, t3),
4161                                              tquotient);
4162                         else
4163                           quotient
4164                             = force_operand (gen_rtx_MINUS (compute_mode,
4165                                                             t3, t4),
4166                                              tquotient);
4167                       }
4168                   }
4169                 else            /* Too wide mode to use tricky code */
4170                   break;
4171
4172                 insn = get_last_insn ();
4173                 if (insn != last
4174                     && (set = single_set (insn)) != 0
4175                     && SET_DEST (set) == quotient)
4176                   set_unique_reg_note (insn,
4177                                        REG_EQUAL,
4178                                        gen_rtx_DIV (compute_mode, op0, op1));
4179               }
4180             break;
4181           }
4182       fail1:
4183         delete_insns_since (last);
4184         break;
4185
4186       case FLOOR_DIV_EXPR:
4187       case FLOOR_MOD_EXPR:
4188       /* We will come here only for signed operations.  */
4189         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4190           {
4191             unsigned HOST_WIDE_INT mh;
4192             int pre_shift, lgup, post_shift;
4193             HOST_WIDE_INT d = INTVAL (op1);
4194             rtx ml;
4195
4196             if (d > 0)
4197               {
4198                 /* We could just as easily deal with negative constants here,
4199                    but it does not seem worth the trouble for GCC 2.6.  */
4200                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4201                   {
4202                     pre_shift = floor_log2 (d);
4203                     if (rem_flag)
4204                       {
4205                         remainder = expand_binop (compute_mode, and_optab, op0,
4206                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4207                                                   remainder, 0, OPTAB_LIB_WIDEN);
4208                         if (remainder)
4209                           return gen_lowpart (mode, remainder);
4210                       }
4211                     quotient = expand_shift
4212                       (RSHIFT_EXPR, compute_mode, op0,
4213                        build_int_cst (NULL_TREE, pre_shift),
4214                        tquotient, 0);
4215                   }
4216                 else
4217                   {
4218                     rtx t1, t2, t3, t4;
4219
4220                     mh = choose_multiplier (d, size, size - 1,
4221                                             &ml, &post_shift, &lgup);
4222                     gcc_assert (!mh);
4223
4224                     if (post_shift < BITS_PER_WORD
4225                         && size - 1 < BITS_PER_WORD)
4226                       {
4227                         t1 = expand_shift
4228                           (RSHIFT_EXPR, compute_mode, op0,
4229                            build_int_cst (NULL_TREE, size - 1),
4230                            NULL_RTX, 0);
4231                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4232                                            NULL_RTX, 0, OPTAB_WIDEN);
4233                         extra_cost = (shift_cost[compute_mode][post_shift]
4234                                       + shift_cost[compute_mode][size - 1]
4235                                       + 2 * add_cost[compute_mode]);
4236                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4237                                                    NULL_RTX, 1,
4238                                                    max_cost - extra_cost);
4239                         if (t3 != 0)
4240                           {
4241                             t4 = expand_shift
4242                               (RSHIFT_EXPR, compute_mode, t3,
4243                                build_int_cst (NULL_TREE, post_shift),
4244                                NULL_RTX, 1);
4245                             quotient = expand_binop (compute_mode, xor_optab,
4246                                                      t4, t1, tquotient, 0,
4247                                                      OPTAB_WIDEN);
4248                           }
4249                       }
4250                   }
4251               }
4252             else
4253               {
4254                 rtx nsign, t1, t2, t3, t4;
4255                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4256                                                   op0, constm1_rtx), NULL_RTX);
4257                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4258                                    0, OPTAB_WIDEN);
4259                 nsign = expand_shift
4260                   (RSHIFT_EXPR, compute_mode, t2,
4261                    build_int_cst (NULL_TREE, size - 1),
4262                    NULL_RTX, 0);
4263                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4264                                     NULL_RTX);
4265                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4266                                     NULL_RTX, 0);
4267                 if (t4)
4268                   {
4269                     rtx t5;
4270                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4271                                       NULL_RTX, 0);
4272                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4273                                                             t4, t5),
4274                                               tquotient);
4275                   }
4276               }
4277           }
4278
4279         if (quotient != 0)
4280           break;
4281         delete_insns_since (last);
4282
4283         /* Try using an instruction that produces both the quotient and
4284            remainder, using truncation.  We can easily compensate the quotient
4285            or remainder to get floor rounding, once we have the remainder.
4286            Notice that we compute also the final remainder value here,
4287            and return the result right away.  */
4288         if (target == 0 || GET_MODE (target) != compute_mode)
4289           target = gen_reg_rtx (compute_mode);
4290
4291         if (rem_flag)
4292           {
4293             remainder
4294               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4295             quotient = gen_reg_rtx (compute_mode);
4296           }
4297         else
4298           {
4299             quotient
4300               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4301             remainder = gen_reg_rtx (compute_mode);
4302           }
4303
4304         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4305                                  quotient, remainder, 0))
4306           {
4307             /* This could be computed with a branch-less sequence.
4308                Save that for later.  */
4309             rtx tem;
4310             rtx label = gen_label_rtx ();
4311             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4312             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4313                                 NULL_RTX, 0, OPTAB_WIDEN);
4314             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4315             expand_dec (quotient, const1_rtx);
4316             expand_inc (remainder, op1);
4317             emit_label (label);
4318             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4319           }
4320
4321         /* No luck with division elimination or divmod.  Have to do it
4322            by conditionally adjusting op0 *and* the result.  */
4323         {
4324           rtx label1, label2, label3, label4, label5;
4325           rtx adjusted_op0;
4326           rtx tem;
4327
4328           quotient = gen_reg_rtx (compute_mode);
4329           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4330           label1 = gen_label_rtx ();
4331           label2 = gen_label_rtx ();
4332           label3 = gen_label_rtx ();
4333           label4 = gen_label_rtx ();
4334           label5 = gen_label_rtx ();
4335           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4336           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4337           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4338                               quotient, 0, OPTAB_LIB_WIDEN);
4339           if (tem != quotient)
4340             emit_move_insn (quotient, tem);
4341           emit_jump_insn (gen_jump (label5));
4342           emit_barrier ();
4343           emit_label (label1);
4344           expand_inc (adjusted_op0, const1_rtx);
4345           emit_jump_insn (gen_jump (label4));
4346           emit_barrier ();
4347           emit_label (label2);
4348           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4349           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4350                               quotient, 0, OPTAB_LIB_WIDEN);
4351           if (tem != quotient)
4352             emit_move_insn (quotient, tem);
4353           emit_jump_insn (gen_jump (label5));
4354           emit_barrier ();
4355           emit_label (label3);
4356           expand_dec (adjusted_op0, const1_rtx);
4357           emit_label (label4);
4358           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4359                               quotient, 0, OPTAB_LIB_WIDEN);
4360           if (tem != quotient)
4361             emit_move_insn (quotient, tem);
4362           expand_dec (quotient, const1_rtx);
4363           emit_label (label5);
4364         }
4365         break;
4366
4367       case CEIL_DIV_EXPR:
4368       case CEIL_MOD_EXPR:
4369         if (unsignedp)
4370           {
4371             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4372               {
4373                 rtx t1, t2, t3;
4374                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4375                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4376                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4377                                    tquotient, 1);
4378                 t2 = expand_binop (compute_mode, and_optab, op0,
4379                                    GEN_INT (d - 1),
4380                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4381                 t3 = gen_reg_rtx (compute_mode);
4382                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4383                                       compute_mode, 1, 1);
4384                 if (t3 == 0)
4385                   {
4386                     rtx lab;
4387                     lab = gen_label_rtx ();
4388                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4389                     expand_inc (t1, const1_rtx);
4390                     emit_label (lab);
4391                     quotient = t1;
4392                   }
4393                 else
4394                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4395                                                           t1, t3),
4396                                             tquotient);
4397                 break;
4398               }
4399
4400             /* Try using an instruction that produces both the quotient and
4401                remainder, using truncation.  We can easily compensate the
4402                quotient or remainder to get ceiling rounding, once we have the
4403                remainder.  Notice that we compute also the final remainder
4404                value here, and return the result right away.  */
4405             if (target == 0 || GET_MODE (target) != compute_mode)
4406               target = gen_reg_rtx (compute_mode);
4407
4408             if (rem_flag)
4409               {
4410                 remainder = (REG_P (target)
4411                              ? target : gen_reg_rtx (compute_mode));
4412                 quotient = gen_reg_rtx (compute_mode);
4413               }
4414             else
4415               {
4416                 quotient = (REG_P (target)
4417                             ? target : gen_reg_rtx (compute_mode));
4418                 remainder = gen_reg_rtx (compute_mode);
4419               }
4420
4421             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4422                                      remainder, 1))
4423               {
4424                 /* This could be computed with a branch-less sequence.
4425                    Save that for later.  */
4426                 rtx label = gen_label_rtx ();
4427                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4428                                  compute_mode, label);
4429                 expand_inc (quotient, const1_rtx);
4430                 expand_dec (remainder, op1);
4431                 emit_label (label);
4432                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4433               }
4434
4435             /* No luck with division elimination or divmod.  Have to do it
4436                by conditionally adjusting op0 *and* the result.  */
4437             {
4438               rtx label1, label2;
4439               rtx adjusted_op0, tem;
4440
4441               quotient = gen_reg_rtx (compute_mode);
4442               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4443               label1 = gen_label_rtx ();
4444               label2 = gen_label_rtx ();
4445               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4446                                compute_mode, label1);
4447               emit_move_insn  (quotient, const0_rtx);
4448               emit_jump_insn (gen_jump (label2));
4449               emit_barrier ();
4450               emit_label (label1);
4451               expand_dec (adjusted_op0, const1_rtx);
4452               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4453                                   quotient, 1, OPTAB_LIB_WIDEN);
4454               if (tem != quotient)
4455                 emit_move_insn (quotient, tem);
4456               expand_inc (quotient, const1_rtx);
4457               emit_label (label2);
4458             }
4459           }
4460         else /* signed */
4461           {
4462             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4463                 && INTVAL (op1) >= 0)
4464               {
4465                 /* This is extremely similar to the code for the unsigned case
4466                    above.  For 2.7 we should merge these variants, but for
4467                    2.6.1 I don't want to touch the code for unsigned since that
4468                    get used in C.  The signed case will only be used by other
4469                    languages (Ada).  */
4470
4471                 rtx t1, t2, t3;
4472                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4473                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4474                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4475                                    tquotient, 0);
4476                 t2 = expand_binop (compute_mode, and_optab, op0,
4477                                    GEN_INT (d - 1),
4478                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4479                 t3 = gen_reg_rtx (compute_mode);
4480                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4481                                       compute_mode, 1, 1);
4482                 if (t3 == 0)
4483                   {
4484                     rtx lab;
4485                     lab = gen_label_rtx ();
4486                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4487                     expand_inc (t1, const1_rtx);
4488                     emit_label (lab);
4489                     quotient = t1;
4490                   }
4491                 else
4492                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4493                                                           t1, t3),
4494                                             tquotient);
4495                 break;
4496               }
4497
4498             /* Try using an instruction that produces both the quotient and
4499                remainder, using truncation.  We can easily compensate the
4500                quotient or remainder to get ceiling rounding, once we have the
4501                remainder.  Notice that we compute also the final remainder
4502                value here, and return the result right away.  */
4503             if (target == 0 || GET_MODE (target) != compute_mode)
4504               target = gen_reg_rtx (compute_mode);
4505             if (rem_flag)
4506               {
4507                 remainder= (REG_P (target)
4508                             ? target : gen_reg_rtx (compute_mode));
4509                 quotient = gen_reg_rtx (compute_mode);
4510               }
4511             else
4512               {
4513                 quotient = (REG_P (target)
4514                             ? target : gen_reg_rtx (compute_mode));
4515                 remainder = gen_reg_rtx (compute_mode);
4516               }
4517
4518             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4519                                      remainder, 0))
4520               {
4521                 /* This could be computed with a branch-less sequence.
4522                    Save that for later.  */
4523                 rtx tem;
4524                 rtx label = gen_label_rtx ();
4525                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4526                                  compute_mode, label);
4527                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4528                                     NULL_RTX, 0, OPTAB_WIDEN);
4529                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4530                 expand_inc (quotient, const1_rtx);
4531                 expand_dec (remainder, op1);
4532                 emit_label (label);
4533                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4534               }
4535
4536             /* No luck with division elimination or divmod.  Have to do it
4537                by conditionally adjusting op0 *and* the result.  */
4538             {
4539               rtx label1, label2, label3, label4, label5;
4540               rtx adjusted_op0;
4541               rtx tem;
4542
4543               quotient = gen_reg_rtx (compute_mode);
4544               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4545               label1 = gen_label_rtx ();
4546               label2 = gen_label_rtx ();
4547               label3 = gen_label_rtx ();
4548               label4 = gen_label_rtx ();
4549               label5 = gen_label_rtx ();
4550               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4551               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4552                                compute_mode, label1);
4553               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4554                                   quotient, 0, OPTAB_LIB_WIDEN);
4555               if (tem != quotient)
4556                 emit_move_insn (quotient, tem);
4557               emit_jump_insn (gen_jump (label5));
4558               emit_barrier ();
4559               emit_label (label1);
4560               expand_dec (adjusted_op0, const1_rtx);
4561               emit_jump_insn (gen_jump (label4));
4562               emit_barrier ();
4563               emit_label (label2);
4564               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4565                                compute_mode, label3);
4566               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4567                                   quotient, 0, OPTAB_LIB_WIDEN);
4568               if (tem != quotient)
4569                 emit_move_insn (quotient, tem);
4570               emit_jump_insn (gen_jump (label5));
4571               emit_barrier ();
4572               emit_label (label3);
4573               expand_inc (adjusted_op0, const1_rtx);
4574               emit_label (label4);
4575               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4576                                   quotient, 0, OPTAB_LIB_WIDEN);
4577               if (tem != quotient)
4578                 emit_move_insn (quotient, tem);
4579               expand_inc (quotient, const1_rtx);
4580               emit_label (label5);
4581             }
4582           }
4583         break;
4584
4585       case EXACT_DIV_EXPR:
4586         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4587           {
4588             HOST_WIDE_INT d = INTVAL (op1);
4589             unsigned HOST_WIDE_INT ml;
4590             int pre_shift;
4591             rtx t1;
4592
4593             pre_shift = floor_log2 (d & -d);
4594             ml = invert_mod2n (d >> pre_shift, size);
4595             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4596                                build_int_cst (NULL_TREE, pre_shift),
4597                                NULL_RTX, unsignedp);
4598             quotient = expand_mult (compute_mode, t1,
4599                                     gen_int_mode (ml, compute_mode),
4600                                     NULL_RTX, 1);
4601
4602             insn = get_last_insn ();
4603             set_unique_reg_note (insn,
4604                                  REG_EQUAL,
4605                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4606                                                  compute_mode,
4607                                                  op0, op1));
4608           }
4609         break;
4610
4611       case ROUND_DIV_EXPR:
4612       case ROUND_MOD_EXPR:
4613         if (unsignedp)
4614           {
4615             rtx tem;
4616             rtx label;
4617             label = gen_label_rtx ();
4618             quotient = gen_reg_rtx (compute_mode);
4619             remainder = gen_reg_rtx (compute_mode);
4620             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4621               {
4622                 rtx tem;
4623                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4624                                          quotient, 1, OPTAB_LIB_WIDEN);
4625                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4626                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4627                                           remainder, 1, OPTAB_LIB_WIDEN);
4628               }
4629             tem = plus_constant (op1, -1);
4630             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4631                                 build_int_cst (NULL_TREE, 1),
4632                                 NULL_RTX, 1);
4633             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4634             expand_inc (quotient, const1_rtx);
4635             expand_dec (remainder, op1);
4636             emit_label (label);
4637           }
4638         else
4639           {
4640             rtx abs_rem, abs_op1, tem, mask;
4641             rtx label;
4642             label = gen_label_rtx ();
4643             quotient = gen_reg_rtx (compute_mode);
4644             remainder = gen_reg_rtx (compute_mode);
4645             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4646               {
4647                 rtx tem;
4648                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4649                                          quotient, 0, OPTAB_LIB_WIDEN);
4650                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4651                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4652                                           remainder, 0, OPTAB_LIB_WIDEN);
4653               }
4654             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4655             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4656             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4657                                 build_int_cst (NULL_TREE, 1),
4658                                 NULL_RTX, 1);
4659             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4660             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661                                 NULL_RTX, 0, OPTAB_WIDEN);
4662             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4663                                  build_int_cst (NULL_TREE, size - 1),
4664                                  NULL_RTX, 0);
4665             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4666                                 NULL_RTX, 0, OPTAB_WIDEN);
4667             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4668                                 NULL_RTX, 0, OPTAB_WIDEN);
4669             expand_inc (quotient, tem);
4670             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4671                                 NULL_RTX, 0, OPTAB_WIDEN);
4672             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4673                                 NULL_RTX, 0, OPTAB_WIDEN);
4674             expand_dec (remainder, tem);
4675             emit_label (label);
4676           }
4677         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4678
4679       default:
4680         gcc_unreachable ();
4681       }
4682
4683   if (quotient == 0)
4684     {
4685       if (target && GET_MODE (target) != compute_mode)
4686         target = 0;
4687
4688       if (rem_flag)
4689         {
4690           /* Try to produce the remainder without producing the quotient.
4691              If we seem to have a divmod pattern that does not require widening,
4692              don't try widening here.  We should really have a WIDEN argument
4693              to expand_twoval_binop, since what we'd really like to do here is
4694              1) try a mod insn in compute_mode
4695              2) try a divmod insn in compute_mode
4696              3) try a div insn in compute_mode and multiply-subtract to get
4697                 remainder
4698              4) try the same things with widening allowed.  */
4699           remainder
4700             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4701                                  op0, op1, target,
4702                                  unsignedp,
4703                                  ((optab2->handlers[compute_mode].insn_code
4704                                    != CODE_FOR_nothing)
4705                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4706           if (remainder == 0)
4707             {
4708               /* No luck there.  Can we do remainder and divide at once
4709                  without a library call?  */
4710               remainder = gen_reg_rtx (compute_mode);
4711               if (! expand_twoval_binop ((unsignedp
4712                                           ? udivmod_optab
4713                                           : sdivmod_optab),
4714                                          op0, op1,
4715                                          NULL_RTX, remainder, unsignedp))
4716                 remainder = 0;
4717             }
4718
4719           if (remainder)
4720             return gen_lowpart (mode, remainder);
4721         }
4722
4723       /* Produce the quotient.  Try a quotient insn, but not a library call.
4724          If we have a divmod in this mode, use it in preference to widening
4725          the div (for this test we assume it will not fail). Note that optab2
4726          is set to the one of the two optabs that the call below will use.  */
4727       quotient
4728         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4729                              op0, op1, rem_flag ? NULL_RTX : target,
4730                              unsignedp,
4731                              ((optab2->handlers[compute_mode].insn_code
4732                                != CODE_FOR_nothing)
4733                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4734
4735       if (quotient == 0)
4736         {
4737           /* No luck there.  Try a quotient-and-remainder insn,
4738              keeping the quotient alone.  */
4739           quotient = gen_reg_rtx (compute_mode);
4740           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4741                                      op0, op1,
4742                                      quotient, NULL_RTX, unsignedp))
4743             {
4744               quotient = 0;
4745               if (! rem_flag)
4746                 /* Still no luck.  If we are not computing the remainder,
4747                    use a library call for the quotient.  */
4748                 quotient = sign_expand_binop (compute_mode,
4749                                               udiv_optab, sdiv_optab,
4750                                               op0, op1, target,
4751                                               unsignedp, OPTAB_LIB_WIDEN);
4752             }
4753         }
4754     }
4755
4756   if (rem_flag)
4757     {
4758       if (target && GET_MODE (target) != compute_mode)
4759         target = 0;
4760
4761       if (quotient == 0)
4762         {
4763           /* No divide instruction either.  Use library for remainder.  */
4764           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4765                                          op0, op1, target,
4766                                          unsignedp, OPTAB_LIB_WIDEN);
4767           /* No remainder function.  Try a quotient-and-remainder
4768              function, keeping the remainder.  */
4769           if (!remainder)
4770             {
4771               remainder = gen_reg_rtx (compute_mode);
4772               if (!expand_twoval_binop_libfunc
4773                   (unsignedp ? udivmod_optab : sdivmod_optab,
4774                    op0, op1,
4775                    NULL_RTX, remainder,
4776                    unsignedp ? UMOD : MOD))
4777                 remainder = NULL_RTX;
4778             }
4779         }
4780       else
4781         {
4782           /* We divided.  Now finish doing X - Y * (X / Y).  */
4783           remainder = expand_mult (compute_mode, quotient, op1,
4784                                    NULL_RTX, unsignedp);
4785           remainder = expand_binop (compute_mode, sub_optab, op0,
4786                                     remainder, target, unsignedp,
4787                                     OPTAB_LIB_WIDEN);
4788         }
4789     }
4790
4791   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4792 }
4793 \f
4794 /* Return a tree node with data type TYPE, describing the value of X.
4795    Usually this is an VAR_DECL, if there is no obvious better choice.
4796    X may be an expression, however we only support those expressions
4797    generated by loop.c.  */
4798
4799 tree
4800 make_tree (tree type, rtx x)
4801 {
4802   tree t;
4803
4804   switch (GET_CODE (x))
4805     {
4806     case CONST_INT:
4807       {
4808         HOST_WIDE_INT hi = 0;
4809
4810         if (INTVAL (x) < 0
4811             && !(TYPE_UNSIGNED (type)
4812                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4813                      < HOST_BITS_PER_WIDE_INT)))
4814           hi = -1;
4815
4816         t = build_int_cst_wide (type, INTVAL (x), hi);
4817
4818         return t;
4819       }
4820
4821     case CONST_DOUBLE:
4822       if (GET_MODE (x) == VOIDmode)
4823         t = build_int_cst_wide (type,
4824                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4825       else
4826         {
4827           REAL_VALUE_TYPE d;
4828
4829           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4830           t = build_real (type, d);
4831         }
4832
4833       return t;
4834
4835     case CONST_VECTOR:
4836       {
4837         int i, units;
4838         rtx elt;
4839         tree t = NULL_TREE;
4840
4841         units = CONST_VECTOR_NUNITS (x);
4842
4843         /* Build a tree with vector elements.  */
4844         for (i = units - 1; i >= 0; --i)
4845           {
4846             elt = CONST_VECTOR_ELT (x, i);
4847             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4848           }
4849
4850         return build_vector (type, t);
4851       }
4852
4853     case PLUS:
4854       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4855                            make_tree (type, XEXP (x, 1))));
4856
4857     case MINUS:
4858       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4859                            make_tree (type, XEXP (x, 1))));
4860
4861     case NEG:
4862       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4863
4864     case MULT:
4865       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4866                            make_tree (type, XEXP (x, 1))));
4867
4868     case ASHIFT:
4869       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4870                            make_tree (type, XEXP (x, 1))));
4871
4872     case LSHIFTRT:
4873       t = lang_hooks.types.unsigned_type (type);
4874       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4875                                          make_tree (t, XEXP (x, 0)),
4876                                          make_tree (type, XEXP (x, 1))));
4877
4878     case ASHIFTRT:
4879       t = lang_hooks.types.signed_type (type);
4880       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4881                                          make_tree (t, XEXP (x, 0)),
4882                                          make_tree (type, XEXP (x, 1))));
4883
4884     case DIV:
4885       if (TREE_CODE (type) != REAL_TYPE)
4886         t = lang_hooks.types.signed_type (type);
4887       else
4888         t = type;
4889
4890       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4891                                          make_tree (t, XEXP (x, 0)),
4892                                          make_tree (t, XEXP (x, 1))));
4893     case UDIV:
4894       t = lang_hooks.types.unsigned_type (type);
4895       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4896                                          make_tree (t, XEXP (x, 0)),
4897                                          make_tree (t, XEXP (x, 1))));
4898
4899     case SIGN_EXTEND:
4900     case ZERO_EXTEND:
4901       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4902                                           GET_CODE (x) == ZERO_EXTEND);
4903       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4904
4905     default:
4906       t = build_decl (VAR_DECL, NULL_TREE, type);
4907
4908       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4909          ptr_mode.  So convert.  */
4910       if (POINTER_TYPE_P (type))
4911         x = convert_memory_address (TYPE_MODE (type), x);
4912
4913       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4914          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4915       t->decl.rtl = x;
4916
4917       return t;
4918     }
4919 }
4920
4921 /* Check whether the multiplication X * MULT + ADD overflows.
4922    X, MULT and ADD must be CONST_*.
4923    MODE is the machine mode for the computation.
4924    X and MULT must have mode MODE.  ADD may have a different mode.
4925    So can X (defaults to same as MODE).
4926    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4927
4928 bool
4929 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4930                            enum machine_mode mode, int unsignedp)
4931 {
4932   tree type, mult_type, add_type, result;
4933
4934   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4935
4936   /* In order to get a proper overflow indication from an unsigned
4937      type, we have to pretend that it's a sizetype.  */
4938   mult_type = type;
4939   if (unsignedp)
4940     {
4941       /* FIXME:It would be nice if we could step directly from this
4942          type to its sizetype equivalent.  */
4943       mult_type = build_distinct_type_copy (type);
4944       TYPE_IS_SIZETYPE (mult_type) = 1;
4945     }
4946
4947   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4948               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4949
4950   result = fold (build2 (PLUS_EXPR, mult_type,
4951                          fold (build2 (MULT_EXPR, mult_type,
4952                                        make_tree (mult_type, x),
4953                                        make_tree (mult_type, mult))),
4954                          make_tree (add_type, add)));
4955
4956   return TREE_CONSTANT_OVERFLOW (result);
4957 }
4958
4959 /* Return an rtx representing the value of X * MULT + ADD.
4960    TARGET is a suggestion for where to store the result (an rtx).
4961    MODE is the machine mode for the computation.
4962    X and MULT must have mode MODE.  ADD may have a different mode.
4963    So can X (defaults to same as MODE).
4964    UNSIGNEDP is nonzero to do unsigned multiplication.
4965    This may emit insns.  */
4966
4967 rtx
4968 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4969                  int unsignedp)
4970 {
4971   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4972   tree add_type = (GET_MODE (add) == VOIDmode
4973                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4974                                                            unsignedp));
4975   tree result =  fold (build2 (PLUS_EXPR, type,
4976                                fold (build2 (MULT_EXPR, type,
4977                                              make_tree (type, x),
4978                                              make_tree (type, mult))),
4979                                make_tree (add_type, add)));
4980
4981   return expand_expr (result, target, VOIDmode, 0);
4982 }
4983 \f
4984 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4985    and returning TARGET.
4986
4987    If TARGET is 0, a pseudo-register or constant is returned.  */
4988
4989 rtx
4990 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4991 {
4992   rtx tem = 0;
4993
4994   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4995     tem = simplify_binary_operation (AND, mode, op0, op1);
4996   if (tem == 0)
4997     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4998
4999   if (target == 0)
5000     target = tem;
5001   else if (tem != target)
5002     emit_move_insn (target, tem);
5003   return target;
5004 }
5005 \f
5006 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5007    and storing in TARGET.  Normally return TARGET.
5008    Return 0 if that cannot be done.
5009
5010    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5011    it is VOIDmode, they cannot both be CONST_INT.
5012
5013    UNSIGNEDP is for the case where we have to widen the operands
5014    to perform the operation.  It says to use zero-extension.
5015
5016    NORMALIZEP is 1 if we should convert the result to be either zero
5017    or one.  Normalize is -1 if we should convert the result to be
5018    either zero or -1.  If NORMALIZEP is zero, the result will be left
5019    "raw" out of the scc insn.  */
5020
5021 rtx
5022 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5023                  enum machine_mode mode, int unsignedp, int normalizep)
5024 {
5025   rtx subtarget;
5026   enum insn_code icode;
5027   enum machine_mode compare_mode;
5028   enum machine_mode target_mode = GET_MODE (target);
5029   rtx tem;
5030   rtx last = get_last_insn ();
5031   rtx pattern, comparison;
5032
5033   if (unsignedp)
5034     code = unsigned_condition (code);
5035
5036   /* If one operand is constant, make it the second one.  Only do this
5037      if the other operand is not constant as well.  */
5038
5039   if (swap_commutative_operands_p (op0, op1))
5040     {
5041       tem = op0;
5042       op0 = op1;
5043       op1 = tem;
5044       code = swap_condition (code);
5045     }
5046
5047   if (mode == VOIDmode)
5048     mode = GET_MODE (op0);
5049
5050   /* For some comparisons with 1 and -1, we can convert this to
5051      comparisons with zero.  This will often produce more opportunities for
5052      store-flag insns.  */
5053
5054   switch (code)
5055     {
5056     case LT:
5057       if (op1 == const1_rtx)
5058         op1 = const0_rtx, code = LE;
5059       break;
5060     case LE:
5061       if (op1 == constm1_rtx)
5062         op1 = const0_rtx, code = LT;
5063       break;
5064     case GE:
5065       if (op1 == const1_rtx)
5066         op1 = const0_rtx, code = GT;
5067       break;
5068     case GT:
5069       if (op1 == constm1_rtx)
5070         op1 = const0_rtx, code = GE;
5071       break;
5072     case GEU:
5073       if (op1 == const1_rtx)
5074         op1 = const0_rtx, code = NE;
5075       break;
5076     case LTU:
5077       if (op1 == const1_rtx)
5078         op1 = const0_rtx, code = EQ;
5079       break;
5080     default:
5081       break;
5082     }
5083
5084   /* If we are comparing a double-word integer with zero or -1, we can
5085      convert the comparison into one involving a single word.  */
5086   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5087       && GET_MODE_CLASS (mode) == MODE_INT
5088       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5089     {
5090       if ((code == EQ || code == NE)
5091           && (op1 == const0_rtx || op1 == constm1_rtx))
5092         {
5093           rtx op00, op01, op0both;
5094
5095           /* Do a logical OR or AND of the two words and compare the result.  */
5096           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5097           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5098           op0both = expand_binop (word_mode,
5099                                   op1 == const0_rtx ? ior_optab : and_optab,
5100                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5101
5102           if (op0both != 0)
5103             return emit_store_flag (target, code, op0both, op1, word_mode,
5104                                     unsignedp, normalizep);
5105         }
5106       else if ((code == LT || code == GE) && op1 == const0_rtx)
5107         {
5108           rtx op0h;
5109
5110           /* If testing the sign bit, can just test on high word.  */
5111           op0h = simplify_gen_subreg (word_mode, op0, mode,
5112                                       subreg_highpart_offset (word_mode, mode));
5113           return emit_store_flag (target, code, op0h, op1, word_mode,
5114                                   unsignedp, normalizep);
5115         }
5116     }
5117
5118   /* From now on, we won't change CODE, so set ICODE now.  */
5119   icode = setcc_gen_code[(int) code];
5120
5121   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5122      complement of A (for GE) and shifting the sign bit to the low bit.  */
5123   if (op1 == const0_rtx && (code == LT || code == GE)
5124       && GET_MODE_CLASS (mode) == MODE_INT
5125       && (normalizep || STORE_FLAG_VALUE == 1
5126           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5127               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5128                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5129     {
5130       subtarget = target;
5131
5132       /* If the result is to be wider than OP0, it is best to convert it
5133          first.  If it is to be narrower, it is *incorrect* to convert it
5134          first.  */
5135       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5136         {
5137           op0 = convert_modes (target_mode, mode, op0, 0);
5138           mode = target_mode;
5139         }
5140
5141       if (target_mode != mode)
5142         subtarget = 0;
5143
5144       if (code == GE)
5145         op0 = expand_unop (mode, one_cmpl_optab, op0,
5146                            ((STORE_FLAG_VALUE == 1 || normalizep)
5147                             ? 0 : subtarget), 0);
5148
5149       if (STORE_FLAG_VALUE == 1 || normalizep)
5150         /* If we are supposed to produce a 0/1 value, we want to do
5151            a logical shift from the sign bit to the low-order bit; for
5152            a -1/0 value, we do an arithmetic shift.  */
5153         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5154                             size_int (GET_MODE_BITSIZE (mode) - 1),
5155                             subtarget, normalizep != -1);
5156
5157       if (mode != target_mode)
5158         op0 = convert_modes (target_mode, mode, op0, 0);
5159
5160       return op0;
5161     }
5162
5163   if (icode != CODE_FOR_nothing)
5164     {
5165       insn_operand_predicate_fn pred;
5166
5167       /* We think we may be able to do this with a scc insn.  Emit the
5168          comparison and then the scc insn.  */
5169
5170       do_pending_stack_adjust ();
5171       last = get_last_insn ();
5172
5173       comparison
5174         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5175       if (CONSTANT_P (comparison))
5176         {
5177           switch (GET_CODE (comparison))
5178             {
5179             case CONST_INT:
5180               if (comparison == const0_rtx)
5181                 return const0_rtx;
5182               break;
5183
5184 #ifdef FLOAT_STORE_FLAG_VALUE
5185             case CONST_DOUBLE:
5186               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5187                 return const0_rtx;
5188               break;
5189 #endif
5190             default:
5191               gcc_unreachable ();
5192             }
5193
5194           if (normalizep == 1)
5195             return const1_rtx;
5196           if (normalizep == -1)
5197             return constm1_rtx;
5198           return const_true_rtx;
5199         }
5200
5201       /* The code of COMPARISON may not match CODE if compare_from_rtx
5202          decided to swap its operands and reverse the original code.
5203
5204          We know that compare_from_rtx returns either a CONST_INT or
5205          a new comparison code, so it is safe to just extract the
5206          code from COMPARISON.  */
5207       code = GET_CODE (comparison);
5208
5209       /* Get a reference to the target in the proper mode for this insn.  */
5210       compare_mode = insn_data[(int) icode].operand[0].mode;
5211       subtarget = target;
5212       pred = insn_data[(int) icode].operand[0].predicate;
5213       if (optimize || ! (*pred) (subtarget, compare_mode))
5214         subtarget = gen_reg_rtx (compare_mode);
5215
5216       pattern = GEN_FCN (icode) (subtarget);
5217       if (pattern)
5218         {
5219           emit_insn (pattern);
5220
5221           /* If we are converting to a wider mode, first convert to
5222              TARGET_MODE, then normalize.  This produces better combining
5223              opportunities on machines that have a SIGN_EXTRACT when we are
5224              testing a single bit.  This mostly benefits the 68k.
5225
5226              If STORE_FLAG_VALUE does not have the sign bit set when
5227              interpreted in COMPARE_MODE, we can do this conversion as
5228              unsigned, which is usually more efficient.  */
5229           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5230             {
5231               convert_move (target, subtarget,
5232                             (GET_MODE_BITSIZE (compare_mode)
5233                              <= HOST_BITS_PER_WIDE_INT)
5234                             && 0 == (STORE_FLAG_VALUE
5235                                      & ((HOST_WIDE_INT) 1
5236                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5237               op0 = target;
5238               compare_mode = target_mode;
5239             }
5240           else
5241             op0 = subtarget;
5242
5243           /* If we want to keep subexpressions around, don't reuse our
5244              last target.  */
5245
5246           if (optimize)
5247             subtarget = 0;
5248
5249           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5250              we don't have to do anything.  */
5251           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5252             ;
5253           /* STORE_FLAG_VALUE might be the most negative number, so write
5254              the comparison this way to avoid a compiler-time warning.  */
5255           else if (- normalizep == STORE_FLAG_VALUE)
5256             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5257
5258           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5259              makes it hard to use a value of just the sign bit due to
5260              ANSI integer constant typing rules.  */
5261           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5262                    && (STORE_FLAG_VALUE
5263                        & ((HOST_WIDE_INT) 1
5264                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5265             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5266                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5267                                 subtarget, normalizep == 1);
5268           else
5269             {
5270               gcc_assert (STORE_FLAG_VALUE & 1);
5271
5272               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5273               if (normalizep == -1)
5274                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5275             }
5276
5277           /* If we were converting to a smaller mode, do the
5278              conversion now.  */
5279           if (target_mode != compare_mode)
5280             {
5281               convert_move (target, op0, 0);
5282               return target;
5283             }
5284           else
5285             return op0;
5286         }
5287     }
5288
5289   delete_insns_since (last);
5290
5291   /* If optimizing, use different pseudo registers for each insn, instead
5292      of reusing the same pseudo.  This leads to better CSE, but slows
5293      down the compiler, since there are more pseudos */
5294   subtarget = (!optimize
5295                && (target_mode == mode)) ? target : NULL_RTX;
5296
5297   /* If we reached here, we can't do this with a scc insn.  However, there
5298      are some comparisons that can be done directly.  For example, if
5299      this is an equality comparison of integers, we can try to exclusive-or
5300      (or subtract) the two operands and use a recursive call to try the
5301      comparison with zero.  Don't do any of these cases if branches are
5302      very cheap.  */
5303
5304   if (BRANCH_COST > 0
5305       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5306       && op1 != const0_rtx)
5307     {
5308       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5309                           OPTAB_WIDEN);
5310
5311       if (tem == 0)
5312         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5313                             OPTAB_WIDEN);
5314       if (tem != 0)
5315         tem = emit_store_flag (target, code, tem, const0_rtx,
5316                                mode, unsignedp, normalizep);
5317       if (tem == 0)
5318         delete_insns_since (last);
5319       return tem;
5320     }
5321
5322   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5323      the constant zero.  Reject all other comparisons at this point.  Only
5324      do LE and GT if branches are expensive since they are expensive on
5325      2-operand machines.  */
5326
5327   if (BRANCH_COST == 0
5328       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5329       || (code != EQ && code != NE
5330           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5331     return 0;
5332
5333   /* See what we need to return.  We can only return a 1, -1, or the
5334      sign bit.  */
5335
5336   if (normalizep == 0)
5337     {
5338       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5339         normalizep = STORE_FLAG_VALUE;
5340
5341       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5342                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5343                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5344         ;
5345       else
5346         return 0;
5347     }
5348
5349   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5350      do the necessary operation below.  */
5351
5352   tem = 0;
5353
5354   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5355      the sign bit set.  */
5356
5357   if (code == LE)
5358     {
5359       /* This is destructive, so SUBTARGET can't be OP0.  */
5360       if (rtx_equal_p (subtarget, op0))
5361         subtarget = 0;
5362
5363       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5364                           OPTAB_WIDEN);
5365       if (tem)
5366         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5367                             OPTAB_WIDEN);
5368     }
5369
5370   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5371      number of bits in the mode of OP0, minus one.  */
5372
5373   if (code == GT)
5374     {
5375       if (rtx_equal_p (subtarget, op0))
5376         subtarget = 0;
5377
5378       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5379                           size_int (GET_MODE_BITSIZE (mode) - 1),
5380                           subtarget, 0);
5381       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5382                           OPTAB_WIDEN);
5383     }
5384
5385   if (code == EQ || code == NE)
5386     {
5387       /* For EQ or NE, one way to do the comparison is to apply an operation
5388          that converts the operand into a positive number if it is nonzero
5389          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5390          for NE we negate.  This puts the result in the sign bit.  Then we
5391          normalize with a shift, if needed.
5392
5393          Two operations that can do the above actions are ABS and FFS, so try
5394          them.  If that doesn't work, and MODE is smaller than a full word,
5395          we can use zero-extension to the wider mode (an unsigned conversion)
5396          as the operation.  */
5397
5398       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5399          that is compensated by the subsequent overflow when subtracting
5400          one / negating.  */
5401
5402       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5403         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5404       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5405         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5406       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5407         {
5408           tem = convert_modes (word_mode, mode, op0, 1);
5409           mode = word_mode;
5410         }
5411
5412       if (tem != 0)
5413         {
5414           if (code == EQ)
5415             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5416                                 0, OPTAB_WIDEN);
5417           else
5418             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5419         }
5420
5421       /* If we couldn't do it that way, for NE we can "or" the two's complement
5422          of the value with itself.  For EQ, we take the one's complement of
5423          that "or", which is an extra insn, so we only handle EQ if branches
5424          are expensive.  */
5425
5426       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5427         {
5428           if (rtx_equal_p (subtarget, op0))
5429             subtarget = 0;
5430
5431           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5432           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5433                               OPTAB_WIDEN);
5434
5435           if (tem && code == EQ)
5436             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5437         }
5438     }
5439
5440   if (tem && normalizep)
5441     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5442                         size_int (GET_MODE_BITSIZE (mode) - 1),
5443                         subtarget, normalizep == 1);
5444
5445   if (tem)
5446     {
5447       if (GET_MODE (tem) != target_mode)
5448         {
5449           convert_move (target, tem, 0);
5450           tem = target;
5451         }
5452       else if (!subtarget)
5453         {
5454           emit_move_insn (target, tem);
5455           tem = target;
5456         }
5457     }
5458   else
5459     delete_insns_since (last);
5460
5461   return tem;
5462 }
5463
5464 /* Like emit_store_flag, but always succeeds.  */
5465
5466 rtx
5467 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5468                        enum machine_mode mode, int unsignedp, int normalizep)
5469 {
5470   rtx tem, label;
5471
5472   /* First see if emit_store_flag can do the job.  */
5473   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5474   if (tem != 0)
5475     return tem;
5476
5477   if (normalizep == 0)
5478     normalizep = 1;
5479
5480   /* If this failed, we have to do this with set/compare/jump/set code.  */
5481
5482   if (!REG_P (target)
5483       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5484     target = gen_reg_rtx (GET_MODE (target));
5485
5486   emit_move_insn (target, const1_rtx);
5487   label = gen_label_rtx ();
5488   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5489                            NULL_RTX, label);
5490
5491   emit_move_insn (target, const0_rtx);
5492   emit_label (label);
5493
5494   return target;
5495 }
5496 \f
5497 /* Perform possibly multi-word comparison and conditional jump to LABEL
5498    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5499
5500    The algorithm is based on the code in expr.c:do_jump.
5501
5502    Note that this does not perform a general comparison.  Only variants
5503    generated within expmed.c are correctly handled, others abort (but could
5504    be handled if needed).  */
5505
5506 static void
5507 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5508                  rtx label)
5509 {
5510   /* If this mode is an integer too wide to compare properly,
5511      compare word by word.  Rely on cse to optimize constant cases.  */
5512
5513   if (GET_MODE_CLASS (mode) == MODE_INT
5514       && ! can_compare_p (op, mode, ccp_jump))
5515     {
5516       rtx label2 = gen_label_rtx ();
5517
5518       switch (op)
5519         {
5520         case LTU:
5521           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5522           break;
5523
5524         case LEU:
5525           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5526           break;
5527
5528         case LT:
5529           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5530           break;
5531
5532         case GT:
5533           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5534           break;
5535
5536         case GE:
5537           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5538           break;
5539
5540           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5541              that's the only equality operations we do */
5542         case EQ:
5543           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5544           do_jump_by_parts_equality_rtx (arg1, label2, label);
5545           break;
5546
5547         case NE:
5548           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5549           do_jump_by_parts_equality_rtx (arg1, label, label2);
5550           break;
5551
5552         default:
5553           gcc_unreachable ();
5554         }
5555
5556       emit_label (label2);
5557     }
5558   else
5559     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5560 }