X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;ds=sidebyside;f=gcc%2Fexpmed.c;h=d4b05837aeb99af1163c0aae9b222a370041c5b9;hb=dccaf904df39815109ff6bfd8cd8c5da2b23acf2;hp=0f3a14df509aaeeb72cf93df63418acbd342bc3b;hpb=99d2e279e9fc2f616f5dc829937723e337ed4e0e;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/expmed.c b/gcc/expmed.c index 0f3a14df509..d4b05837aeb 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -1,14 +1,14 @@ /* Medium-level subroutines: convert bit-field store and extract and shifts, multiplies and divides to rtl instructions. Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -17,9 +17,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" @@ -37,6 +36,8 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "real.h" #include "recog.h" #include "langhooks.h" +#include "df.h" +#include "target.h" static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, @@ -63,8 +64,8 @@ static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); Usually, this will mean that the MD file will emit non-branch sequences. */ -static bool sdiv_pow2_cheap[NUM_MACHINE_MODES]; -static bool smod_pow2_cheap[NUM_MACHINE_MODES]; +static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES]; +static bool smod_pow2_cheap[2][NUM_MACHINE_MODES]; #ifndef SLOW_UNALIGNED_ACCESS #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT @@ -97,17 +98,18 @@ static bool smod_pow2_cheap[NUM_MACHINE_MODES]; /* Cost of various pieces of RTL. Note that some of these are indexed by shift count and some by mode. */ -static int zero_cost; -static int add_cost[NUM_MACHINE_MODES]; -static int neg_cost[NUM_MACHINE_MODES]; -static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; -static int mul_cost[NUM_MACHINE_MODES]; -static int sdiv_cost[NUM_MACHINE_MODES]; -static int udiv_cost[NUM_MACHINE_MODES]; -static int mul_widen_cost[NUM_MACHINE_MODES]; -static int mul_highpart_cost[NUM_MACHINE_MODES]; +static int zero_cost[2]; +static int add_cost[2][NUM_MACHINE_MODES]; +static int neg_cost[2][NUM_MACHINE_MODES]; +static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int mul_cost[2][NUM_MACHINE_MODES]; +static int sdiv_cost[2][NUM_MACHINE_MODES]; +static int udiv_cost[2][NUM_MACHINE_MODES]; +static int mul_widen_cost[2][NUM_MACHINE_MODES]; +static int mul_highpart_cost[2][NUM_MACHINE_MODES]; void init_expmed (void) @@ -129,27 +131,27 @@ init_expmed (void) struct rtx_def shift; rtunion shift_fld1; struct rtx_def shift_mult; rtunion shift_mult_fld1; struct rtx_def shift_add; rtunion shift_add_fld1; - struct rtx_def shift_sub; rtunion shift_sub_fld1; + struct rtx_def shift_sub0; rtunion shift_sub0_fld1; + struct rtx_def shift_sub1; rtunion shift_sub1_fld1; } all; rtx pow2[MAX_BITS_PER_WORD]; rtx cint[MAX_BITS_PER_WORD]; int m, n; enum machine_mode mode, wider_mode; + int speed; - zero_cost = rtx_cost (const0_rtx, 0); for (m = 1; m < MAX_BITS_PER_WORD; m++) { pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); cint[m] = GEN_INT (m); } - memset (&all, 0, sizeof all); PUT_CODE (&all.reg, REG); /* Avoid using hard regs in ways which may be unsupported. */ - REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1; + SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); PUT_CODE (&all.plus, PLUS); XEXP (&all.plus, 0) = &all.reg; @@ -201,65 +203,81 @@ init_expmed (void) XEXP (&all.shift_add, 0) = &all.shift_mult; XEXP (&all.shift_add, 1) = &all.reg; - PUT_CODE (&all.shift_sub, MINUS); - XEXP (&all.shift_sub, 0) = &all.shift_mult; - XEXP (&all.shift_sub, 1) = &all.reg; + PUT_CODE (&all.shift_sub0, MINUS); + XEXP (&all.shift_sub0, 0) = &all.shift_mult; + XEXP (&all.shift_sub0, 1) = &all.reg; - for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); - mode != VOIDmode; - mode = GET_MODE_WIDER_MODE (mode)) + PUT_CODE (&all.shift_sub1, MINUS); + XEXP (&all.shift_sub1, 0) = &all.reg; + XEXP (&all.shift_sub1, 1) = &all.shift_mult; + + for (speed = 0; speed < 2; speed++) { - PUT_MODE (&all.reg, mode); - PUT_MODE (&all.plus, mode); - PUT_MODE (&all.neg, mode); - PUT_MODE (&all.mult, mode); - PUT_MODE (&all.sdiv, mode); - PUT_MODE (&all.udiv, mode); - PUT_MODE (&all.sdiv_32, mode); - PUT_MODE (&all.smod_32, mode); - PUT_MODE (&all.wide_trunc, mode); - PUT_MODE (&all.shift, mode); - PUT_MODE (&all.shift_mult, mode); - PUT_MODE (&all.shift_add, mode); - PUT_MODE (&all.shift_sub, mode); - - add_cost[mode] = rtx_cost (&all.plus, SET); - neg_cost[mode] = rtx_cost (&all.neg, SET); - mul_cost[mode] = rtx_cost (&all.mult, SET); - sdiv_cost[mode] = rtx_cost (&all.sdiv, SET); - udiv_cost[mode] = rtx_cost (&all.udiv, SET); - - sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET) - <= 2 * add_cost[mode]); - smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET) - <= 4 * add_cost[mode]); - - wider_mode = GET_MODE_WIDER_MODE (mode); - if (wider_mode != VOIDmode) - { - PUT_MODE (&all.zext, wider_mode); - PUT_MODE (&all.wide_mult, wider_mode); - PUT_MODE (&all.wide_lshr, wider_mode); - XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); + crtl->maybe_hot_insn_p = speed; + zero_cost[speed] = rtx_cost (const0_rtx, SET, speed); - mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET); - mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET); - } + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); + mode != VOIDmode; + mode = GET_MODE_WIDER_MODE (mode)) + { + PUT_MODE (&all.reg, mode); + PUT_MODE (&all.plus, mode); + PUT_MODE (&all.neg, mode); + PUT_MODE (&all.mult, mode); + PUT_MODE (&all.sdiv, mode); + PUT_MODE (&all.udiv, mode); + PUT_MODE (&all.sdiv_32, mode); + PUT_MODE (&all.smod_32, mode); + PUT_MODE (&all.wide_trunc, mode); + PUT_MODE (&all.shift, mode); + PUT_MODE (&all.shift_mult, mode); + PUT_MODE (&all.shift_add, mode); + PUT_MODE (&all.shift_sub0, mode); + PUT_MODE (&all.shift_sub1, mode); + + add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed); + neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed); + mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed); + sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed); + udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed); + + sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed) + <= 2 * add_cost[speed][mode]); + smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed) + <= 4 * add_cost[speed][mode]); + + wider_mode = GET_MODE_WIDER_MODE (mode); + if (wider_mode != VOIDmode) + { + PUT_MODE (&all.zext, wider_mode); + PUT_MODE (&all.wide_mult, wider_mode); + PUT_MODE (&all.wide_lshr, wider_mode); + XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); + + mul_widen_cost[speed][wider_mode] + = rtx_cost (&all.wide_mult, SET, speed); + mul_highpart_cost[speed][mode] + = rtx_cost (&all.wide_trunc, SET, speed); + } - shift_cost[mode][0] = 0; - shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode]; + shift_cost[speed][mode][0] = 0; + shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0] + = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode]; - n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); - for (m = 1; m < n; m++) - { - XEXP (&all.shift, 1) = cint[m]; - XEXP (&all.shift_mult, 1) = pow2[m]; + n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); + for (m = 1; m < n; m++) + { + XEXP (&all.shift, 1) = cint[m]; + XEXP (&all.shift_mult, 1) = pow2[m]; - shift_cost[mode][m] = rtx_cost (&all.shift, SET); - shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET); - shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET); + shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed); + shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed); + shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed); + shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed); + } } } + default_rtl_profile (); } /* Return an rtx representing minus the value of X. @@ -326,26 +344,33 @@ mode_for_extraction (enum extraction_pattern pattern, int opno) return data->operand[opno].mode; } - -/* Generate code to store value from rtx VALUE - into a bit-field within structure STR_RTX - containing BITSIZE bits starting at bit BITNUM. - FIELDMODE is the machine-mode of the FIELD_DECL node for this field. - ALIGN is the alignment that STR_RTX is known to have. - TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */ +/* Return true if X, of mode MODE, matches the predicate for operand + OPNO of instruction ICODE. Allow volatile memories, regardless of + the ambient volatile_ok setting. */ + +static bool +check_predicate_volatile_ok (enum insn_code icode, int opno, + rtx x, enum machine_mode mode) +{ + bool save_volatile_ok, result; -/* ??? Note that there are two different ideas here for how - to determine the size to count bits within, for a register. - One is BITS_PER_WORD, and the other is the size of operand 3 - of the insv pattern. + save_volatile_ok = volatile_ok; + result = insn_data[(int) icode].operand[opno].predicate (x, mode); + volatile_ok = save_volatile_ok; + return result; +} + +/* A subroutine of store_bit_field, with the same arguments. Return true + if the operation could be implemented. - If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD - else, we use the mode of operand 3. */ + If FALLBACK_P is true, fall back to store_fixed_bit_field if we have + no other way of implementing the operation. If FALLBACK_P is false, + return false instead. */ -rtx -store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, - unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, - rtx value) +static bool +store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, + rtx value, bool fallback_p) { unsigned int unit = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; @@ -365,7 +390,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, always get higher addresses. */ int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); - + byte_offset = 0; /* Paradoxical subregs need special handling on big endian machines. */ @@ -389,13 +414,13 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, lies completely outside that register. This can occur if the source code contains an out-of-bounds access to a small array. */ if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) - return value; + return true; /* Use vec_set patterns for inserting parts of vectors whenever available. */ if (VECTOR_MODE_P (GET_MODE (op0)) && !MEM_P (op0) - && (vec_set_optab->handlers[GET_MODE (op0)].insn_code + && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code != CODE_FOR_nothing) && fieldmode == GET_MODE_INNER (GET_MODE (op0)) && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) @@ -403,7 +428,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, { enum machine_mode outermode = GET_MODE (op0); enum machine_mode innermode = GET_MODE_INNER (outermode); - int icode = (int) vec_set_optab->handlers[outermode].insn_code; + int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code; int pos = bitnum / GET_MODE_BITSIZE (innermode); rtx rtxpos = GEN_INT (pos); rtx src = value; @@ -433,7 +458,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, { emit_insn (seq); emit_insn (pat); - return dest; + return true; } } @@ -465,7 +490,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), byte_offset); emit_move_insn (op0, value); - return value; + return true; } /* Make sure we are playing with integral modes. Pun with subregs @@ -509,13 +534,16 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, if (!MEM_P (op0) && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) && bitsize == GET_MODE_BITSIZE (fieldmode) - && (movstrict_optab->handlers[fieldmode].insn_code + && (optab_handler (movstrict_optab, fieldmode)->insn_code != CODE_FOR_nothing)) { - int icode = movstrict_optab->handlers[fieldmode].insn_code; + int icode = optab_handler (movstrict_optab, fieldmode)->insn_code; + rtx insn; + rtx start = get_last_insn (); + rtx arg0 = op0; /* Get appropriate low part of the value being stored. */ - if (GET_CODE (value) == CONST_INT || REG_P (value)) + if (CONST_INT_P (value) || REG_P (value)) value = gen_lowpart (fieldmode, value); else if (!(GET_CODE (value) == SYMBOL_REF || GET_CODE (value) == LABEL_REF @@ -533,16 +561,20 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode || GET_MODE_CLASS (fieldmode) == MODE_INT || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); - op0 = SUBREG_REG (op0); + arg0 = SUBREG_REG (op0); } - emit_insn (GEN_FCN (icode) - (gen_rtx_SUBREG (fieldmode, op0, + insn = (GEN_FCN (icode) + (gen_rtx_SUBREG (fieldmode, arg0, (bitnum % BITS_PER_WORD) / BITS_PER_UNIT + (offset * UNITS_PER_WORD)), value)); - - return value; + if (insn) + { + emit_insn (insn); + return true; + } + delete_insns_since (start); } /* Handle fields bigger than a word. */ @@ -558,6 +590,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; unsigned int i; + rtx last; /* This is the mode we must force value to, so that there will be enough subwords to extract. Note that fieldmode will often (always?) be @@ -568,6 +601,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, if (fieldmode == VOIDmode) fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); + last = get_last_insn (); for (i = 0; i < nwords; i++) { /* If I is 0, use the low-order word in both field and target; @@ -578,13 +612,18 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, * BITS_PER_WORD, 0) : (int) i * BITS_PER_WORD); + rtx value_word = operand_subword_force (value, wordnum, fieldmode); - store_bit_field (op0, MIN (BITS_PER_WORD, - bitsize - i * BITS_PER_WORD), - bitnum + bit_offset, word_mode, - operand_subword_force (value, wordnum, fieldmode)); + if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD, + bitsize - i * BITS_PER_WORD), + bitnum + bit_offset, word_mode, + value_word, fallback_p)) + { + delete_insns_since (last); + return false; + } } - return value; + return true; } /* From here on we can assume that the field to be stored in is @@ -638,74 +677,45 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), - VOIDmode)) + VOIDmode) + && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode)) { int xbitpos = bitpos; rtx value1; rtx xop0 = op0; rtx last = get_last_insn (); rtx pat; - enum machine_mode maxmode = mode_for_extraction (EP_insv, 3); - int save_volatile_ok = volatile_ok; - - volatile_ok = 1; - - /* If this machine's insv can only insert into a register, copy OP0 - into a register and save it back later. */ - if (MEM_P (op0) - && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate) - (op0, VOIDmode))) - { - rtx tempreg; - enum machine_mode bestmode; - - /* Get the mode to use for inserting into this field. If OP0 is - BLKmode, get the smallest mode consistent with the alignment. If - OP0 is a non-BLKmode object that is no wider than MAXMODE, use its - mode. Otherwise, use the smallest mode containing the field. */ - - if (GET_MODE (op0) == BLKmode - || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode)) - bestmode - = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode, - MEM_VOLATILE_P (op0)); - else - bestmode = GET_MODE (op0); - - if (bestmode == VOIDmode - || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode) - || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) - && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) - goto insv_loses; - - /* Adjust address to point to the containing unit of that mode. - Compute offset as multiple of this unit, counting in bytes. */ - unit = GET_MODE_BITSIZE (bestmode); - offset = (bitnum / unit) * GET_MODE_SIZE (bestmode); - bitpos = bitnum % unit; - op0 = adjust_address (op0, bestmode, offset); - - /* Fetch that unit, store the bitfield in it, then store - the unit. */ - tempreg = copy_to_reg (op0); - store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value); - emit_move_insn (op0, tempreg); - return value; - } - volatile_ok = save_volatile_ok; + bool copy_back = false; /* Add OFFSET into OP0's address. */ if (MEM_P (xop0)) xop0 = adjust_address (xop0, byte_mode, offset); - /* If xop0 is a register, we need it in MAXMODE + /* If xop0 is a register, we need it in OP_MODE to make it acceptable to the format of insv. */ if (GET_CODE (xop0) == SUBREG) /* We can't just change the mode, because this might clobber op0, and we will need the original value of op0 if insv fails. */ - xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); - if (REG_P (xop0) && GET_MODE (xop0) != maxmode) - xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); + xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); + if (REG_P (xop0) && GET_MODE (xop0) != op_mode) + xop0 = gen_lowpart_SUBREG (op_mode, xop0); + + /* If the destination is a paradoxical subreg such that we need a + truncate to the inner mode, perform the insertion on a temporary and + truncate the result to the original destination. Note that we can't + just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N + X) 0)) is (reg:N X). */ + if (GET_CODE (xop0) == SUBREG + && REG_P (SUBREG_REG (xop0)) + && (!TRULY_NOOP_TRUNCATION + (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))), + GET_MODE_BITSIZE (op_mode)))) + { + rtx tem = gen_reg_rtx (op_mode); + emit_move_insn (tem, xop0); + xop0 = tem; + copy_back = true; + } /* On big-endian machines, we count bits from the most significant. If the bit field insn does not, we must invert. */ @@ -716,13 +726,13 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, /* We have been counting XBITPOS within UNIT. Count instead within the size of the register. */ if (BITS_BIG_ENDIAN && !MEM_P (xop0)) - xbitpos += GET_MODE_BITSIZE (maxmode) - unit; + xbitpos += GET_MODE_BITSIZE (op_mode) - unit; - unit = GET_MODE_BITSIZE (maxmode); + unit = GET_MODE_BITSIZE (op_mode); - /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */ + /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ value1 = value; - if (GET_MODE (value) != maxmode) + if (GET_MODE (value) != op_mode) { if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) { @@ -730,23 +740,23 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, if it has all the bits we will actually use. However, if we must narrow it, be sure we do it correctly. */ - if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode)) + if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) { rtx tmp; - tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0); + tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); if (! tmp) - tmp = simplify_gen_subreg (maxmode, + tmp = simplify_gen_subreg (op_mode, force_reg (GET_MODE (value), value1), GET_MODE (value), 0); value1 = tmp; } else - value1 = gen_lowpart (maxmode, value1); + value1 = gen_lowpart (op_mode, value1); } - else if (GET_CODE (value) == CONST_INT) - value1 = gen_int_mode (INTVAL (value), maxmode); + else if (CONST_INT_P (value)) + value1 = gen_int_mode (INTVAL (value), op_mode); else /* Parse phase is supposed to make VALUE's data type match that of the component reference, which is a type @@ -758,23 +768,92 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, /* If this machine's insv insists on a register, get VALUE1 into a register. */ if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate) - (value1, maxmode))) - value1 = force_reg (maxmode, value1); + (value1, op_mode))) + value1 = force_reg (op_mode, value1); pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1); if (pat) - emit_insn (pat); + { + emit_insn (pat); + + if (copy_back) + convert_move (op0, xop0, true); + return true; + } + delete_insns_since (last); + } + + /* If OP0 is a memory, try copying it to a register and seeing if a + cheap register alternative is available. */ + if (HAVE_insv && MEM_P (op0)) + { + enum machine_mode bestmode; + + /* Get the mode to use for inserting into this field. If OP0 is + BLKmode, get the smallest mode consistent with the alignment. If + OP0 is a non-BLKmode object that is no wider than OP_MODE, use its + mode. Otherwise, use the smallest mode containing the field. */ + + if (GET_MODE (op0) == BLKmode + || (op_mode != MAX_MACHINE_MODE + && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) + bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), + (op_mode == MAX_MACHINE_MODE + ? VOIDmode : op_mode), + MEM_VOLATILE_P (op0)); else + bestmode = GET_MODE (op0); + + if (bestmode != VOIDmode + && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) + && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) + && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) { + rtx last, tempreg, xop0; + unsigned HOST_WIDE_INT xoffset, xbitpos; + + last = get_last_insn (); + + /* Adjust address to point to the containing unit of + that mode. Compute the offset as a multiple of this unit, + counting in bytes. */ + unit = GET_MODE_BITSIZE (bestmode); + xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); + xbitpos = bitnum % unit; + xop0 = adjust_address (op0, bestmode, xoffset); + + /* Fetch that unit, store the bitfield in it, then store + the unit. */ + tempreg = copy_to_reg (xop0); + if (store_bit_field_1 (tempreg, bitsize, xbitpos, + fieldmode, orig_value, false)) + { + emit_move_insn (xop0, tempreg); + return true; + } delete_insns_since (last); - store_fixed_bit_field (op0, offset, bitsize, bitpos, value); } } - else - insv_loses: - /* Insv is not available; store using shifts and boolean ops. */ - store_fixed_bit_field (op0, offset, bitsize, bitpos, value); - return value; + + if (!fallback_p) + return false; + + store_fixed_bit_field (op0, offset, bitsize, bitpos, value); + return true; +} + +/* Generate code to store value from rtx VALUE + into a bit-field within structure STR_RTX + containing BITSIZE bits starting at bit BITNUM. + FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ + +void +store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, + rtx value) +{ + if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true)) + gcc_unreachable (); } /* Use shifts and boolean operations to store VALUE @@ -878,7 +957,7 @@ store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, /* Shift VALUE left by BITPOS bits. If VALUE is not constant, we must first convert its mode to MODE. */ - if (GET_CODE (value) == CONST_INT) + if (CONST_INT_P (value)) { HOST_WIDE_INT v = INTVAL (value); @@ -900,13 +979,7 @@ store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, && bitpos + bitsize != GET_MODE_BITSIZE (mode)); if (GET_MODE (value) != mode) - { - if ((REG_P (value) || GET_CODE (value) == SUBREG) - && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value))) - value = gen_lowpart (mode, value); - else - value = convert_to_mode (mode, value, 1); - } + value = convert_to_mode (mode, value, 1); if (must_and) value = expand_binop (mode, and_optab, value, @@ -942,7 +1015,10 @@ store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, } if (op0 != temp) - emit_move_insn (op0, temp); + { + op0 = copy_rtx (op0); + emit_move_insn (op0, temp); + } } /* Store a bit field that is split across multiple accessible memory objects. @@ -971,7 +1047,7 @@ store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, /* If VALUE is a constant other than a CONST_INT, get it into a register in WORD_MODE. If we can do this using gen_lowpart_common, do so. Note that VALUE might be a floating-point constant. */ - if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT) + if (CONSTANT_P (value) && !CONST_INT_P (value)) { rtx word = gen_lowpart_common (word_mode, value); @@ -1013,7 +1089,7 @@ store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, total_bits = GET_MODE_BITSIZE (GET_MODE (value)); /* Fetch successively less significant portions. */ - if (GET_CODE (value) == CONST_INT) + if (CONST_INT_P (value)) part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) >> (bitsize - bitsdone - thissize)) & (((HOST_WIDE_INT) 1 << thissize) - 1)); @@ -1028,7 +1104,7 @@ store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, else { /* Fetch successively more significant portions. */ - if (GET_CODE (value) == CONST_INT) + if (CONST_INT_P (value)) part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsdone) & (((HOST_WIDE_INT) 1 << thissize) - 1)); @@ -1066,40 +1142,52 @@ store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, } } -/* Generate code to extract a byte-field from STR_RTX - containing BITSIZE bits, starting at BITNUM, - and put it in TARGET if possible (if TARGET is nonzero). - Regardless of TARGET, we return the rtx for where the value is placed. +/* A subroutine of extract_bit_field_1 that converts return value X + to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments + to extract_bit_field. */ - STR_RTX is the structure containing the byte (a REG or MEM). - UNSIGNEDP is nonzero if this is an unsigned bit field. - MODE is the natural mode of the field value once extracted. - TMODE is the mode the caller would like the value to have; - but the value may be returned with type MODE instead. +static rtx +convert_extracted_bit_field (rtx x, enum machine_mode mode, + enum machine_mode tmode, bool unsignedp) +{ + if (GET_MODE (x) == tmode || GET_MODE (x) == mode) + return x; - TOTAL_SIZE is the size in bytes of the containing structure, - or -1 if varying. + /* If the x mode is not a scalar integral, first convert to the + integer mode of that size and then access it as a floating-point + value via a SUBREG. */ + if (!SCALAR_INT_MODE_P (tmode)) + { + enum machine_mode smode; - If a TARGET is specified and we can store in it at no extra cost, - we do so, and return TARGET. - Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred - if they are equally easy. */ + smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); + x = convert_to_mode (smode, x, unsignedp); + x = force_reg (smode, x); + return gen_lowpart (tmode, x); + } -rtx -extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, - unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, - enum machine_mode mode, enum machine_mode tmode) + return convert_to_mode (tmode, x, unsignedp); +} + +/* A subroutine of extract_bit_field, with the same arguments. + If FALLBACK_P is true, fall back to extract_fixed_bit_field + if we can find no other means of implementing the operation. + if FALLBACK_P is false, return NULL instead. */ + +static rtx +extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, + enum machine_mode mode, enum machine_mode tmode, + bool fallback_p) { unsigned int unit = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; unsigned HOST_WIDE_INT offset, bitpos; rtx op0 = str_rtx; - rtx spec_target = target; - rtx spec_target_subreg = 0; enum machine_mode int_mode; - enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0); - enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0); + enum machine_mode ext_mode; enum machine_mode mode1; + enum insn_code icode; int byte_offset; if (tmode == VOIDmode) @@ -1126,18 +1214,48 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, return op0; } + /* See if we can get a better vector mode before extracting. */ + if (VECTOR_MODE_P (GET_MODE (op0)) + && !MEM_P (op0) + && GET_MODE_INNER (GET_MODE (op0)) != tmode) + { + enum machine_mode new_mode; + int nunits = GET_MODE_NUNITS (GET_MODE (op0)); + + if (GET_MODE_CLASS (tmode) == MODE_FLOAT) + new_mode = MIN_MODE_VECTOR_FLOAT; + else if (GET_MODE_CLASS (tmode) == MODE_FRACT) + new_mode = MIN_MODE_VECTOR_FRACT; + else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) + new_mode = MIN_MODE_VECTOR_UFRACT; + else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) + new_mode = MIN_MODE_VECTOR_ACCUM; + else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) + new_mode = MIN_MODE_VECTOR_UACCUM; + else + new_mode = MIN_MODE_VECTOR_INT; + + for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) + if (GET_MODE_NUNITS (new_mode) == nunits + && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) + && targetm.vector_mode_supported_p (new_mode)) + break; + if (new_mode != VOIDmode) + op0 = gen_lowpart (new_mode, op0); + } + /* Use vec_extract patterns for extracting parts of vectors whenever available. */ if (VECTOR_MODE_P (GET_MODE (op0)) && !MEM_P (op0) - && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code + && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code != CODE_FOR_nothing) && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) { enum machine_mode outermode = GET_MODE (op0); enum machine_mode innermode = GET_MODE_INNER (outermode); - int icode = (int) vec_extract_optab->handlers[outermode].insn_code; + int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code; unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); rtx rtxpos = GEN_INT (pos); rtx src = op0; @@ -1176,6 +1294,8 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, { emit_insn (seq); emit_insn (pat); + if (mode0 != mode) + return gen_lowpart (tmode, dest); return dest; } } @@ -1188,9 +1308,8 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, { if (MEM_P (op0)) op0 = adjust_address (op0, imode, 0); - else + else if (imode != BLKmode) { - gcc_assert (imode != BLKmode); op0 = gen_lowpart (imode, op0); /* If we got a SUBREG, force it into a register since we @@ -1198,6 +1317,24 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, if (GET_CODE (op0) == SUBREG) op0 = force_reg (imode, op0); } + else if (REG_P (op0)) + { + rtx reg, subreg; + imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), + MODE_INT); + reg = gen_reg_rtx (imode); + subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); + emit_move_insn (subreg, op0); + op0 = reg; + bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; + } + else + { + rtx mem = assign_stack_temp (GET_MODE (op0), + GET_MODE_SIZE (GET_MODE (op0)), 0); + emit_move_insn (mem, op0); + op0 = adjust_address (mem, BLKmode, 0); + } } } @@ -1252,7 +1389,7 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, ? bitpos + bitsize == BITS_PER_WORD : bitpos == 0))) && ((!MEM_P (op0) - && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode), + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1), GET_MODE_BITSIZE (GET_MODE (op0))) && GET_MODE_SIZE (mode1) != 0 && byte_offset % GET_MODE_SIZE (mode1) == 0) @@ -1261,18 +1398,15 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, || (offset * BITS_PER_UNIT % bitsize == 0 && MEM_ALIGN (op0) % bitsize == 0))))) { - if (mode1 != GET_MODE (op0)) + if (MEM_P (op0)) + op0 = adjust_address (op0, mode1, offset); + else if (mode1 != GET_MODE (op0)) { - if (MEM_P (op0)) - op0 = adjust_address (op0, mode1, offset); - else - { - rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), - byte_offset); - if (sub == NULL) - goto no_subreg_mode_swap; - op0 = sub; - } + rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), + byte_offset); + if (sub == NULL) + goto no_subreg_mode_swap; + op0 = sub; } if (mode1 != mode) return convert_to_mode (tmode, op0, unsignedp); @@ -1296,7 +1430,7 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, target = gen_reg_rtx (mode); /* Indicate for flow that the entire target reg is being set. */ - emit_insn (gen_rtx_CLOBBER (VOIDmode, target)); + emit_clobber (target); for (i = 0; i < nwords; i++) { @@ -1384,299 +1518,178 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, } /* Now OFFSET is nonzero only for memory operands. */ - - if (unsignedp) + ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); + icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv; + if (ext_mode != MAX_MACHINE_MODE + && bitsize > 0 + && GET_MODE_BITSIZE (ext_mode) >= bitsize + /* If op0 is a register, we need it in EXT_MODE to make it + acceptable to the format of ext(z)v. */ + && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) + && !((REG_P (op0) || GET_CODE (op0) == SUBREG) + && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))) + && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0))) { - if (HAVE_extzv - && bitsize > 0 - && GET_MODE_BITSIZE (extzv_mode) >= bitsize - && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) - && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode)))) - { - unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; - rtx bitsize_rtx, bitpos_rtx; - rtx last = get_last_insn (); - rtx xop0 = op0; - rtx xtarget = target; - rtx xspec_target = spec_target; - rtx xspec_target_subreg = spec_target_subreg; - rtx pat; - enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0); - - if (MEM_P (xop0)) - { - int save_volatile_ok = volatile_ok; - volatile_ok = 1; - - /* Is the memory operand acceptable? */ - if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate) - (xop0, GET_MODE (xop0)))) - { - /* No, load into a reg and extract from there. */ - enum machine_mode bestmode; - - /* Get the mode to use for inserting into this field. If - OP0 is BLKmode, get the smallest mode consistent with the - alignment. If OP0 is a non-BLKmode object that is no - wider than MAXMODE, use its mode. Otherwise, use the - smallest mode containing the field. */ - - if (GET_MODE (xop0) == BLKmode - || (GET_MODE_SIZE (GET_MODE (op0)) - > GET_MODE_SIZE (maxmode))) - bestmode = get_best_mode (bitsize, bitnum, - MEM_ALIGN (xop0), maxmode, - MEM_VOLATILE_P (xop0)); - else - bestmode = GET_MODE (xop0); - - if (bestmode == VOIDmode - || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0)) - && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0))) - goto extzv_loses; - - /* Compute offset as multiple of this unit, - counting in bytes. */ - unit = GET_MODE_BITSIZE (bestmode); - xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); - xbitpos = bitnum % unit; - xop0 = adjust_address (xop0, bestmode, xoffset); - - /* Make sure register is big enough for the whole field. */ - if (xoffset * BITS_PER_UNIT + unit - < offset * BITS_PER_UNIT + bitsize) - goto extzv_loses; - - /* Fetch it to a register in that size. */ - xop0 = force_reg (bestmode, xop0); - - /* XBITPOS counts within UNIT, which is what is expected. */ - } - else - /* Get ref to first byte containing part of the field. */ - xop0 = adjust_address (xop0, byte_mode, xoffset); - - volatile_ok = save_volatile_ok; - } + unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; + rtx bitsize_rtx, bitpos_rtx; + rtx last = get_last_insn (); + rtx xop0 = op0; + rtx xtarget = target; + rtx xspec_target = target; + rtx xspec_target_subreg = 0; + rtx pat; - /* If op0 is a register, we need it in MAXMODE (which is usually - SImode). to make it acceptable to the format of extzv. */ - if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode) - goto extzv_loses; - if (REG_P (xop0) && GET_MODE (xop0) != maxmode) - xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); + /* If op0 is a register, we need it in EXT_MODE to make it + acceptable to the format of ext(z)v. */ + if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) + xop0 = gen_lowpart_SUBREG (ext_mode, xop0); + if (MEM_P (xop0)) + /* Get ref to first byte containing part of the field. */ + xop0 = adjust_address (xop0, byte_mode, xoffset); - /* On big-endian machines, we count bits from the most significant. - If the bit field insn does not, we must invert. */ - if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) - xbitpos = unit - bitsize - xbitpos; + /* On big-endian machines, we count bits from the most significant. + If the bit field insn does not, we must invert. */ + if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) + xbitpos = unit - bitsize - xbitpos; - /* Now convert from counting within UNIT to counting in MAXMODE. */ - if (BITS_BIG_ENDIAN && !MEM_P (xop0)) - xbitpos += GET_MODE_BITSIZE (maxmode) - unit; + /* Now convert from counting within UNIT to counting in EXT_MODE. */ + if (BITS_BIG_ENDIAN && !MEM_P (xop0)) + xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; - unit = GET_MODE_BITSIZE (maxmode); + unit = GET_MODE_BITSIZE (ext_mode); - if (xtarget == 0) - xtarget = xspec_target = gen_reg_rtx (tmode); + if (xtarget == 0) + xtarget = xspec_target = gen_reg_rtx (tmode); - if (GET_MODE (xtarget) != maxmode) + if (GET_MODE (xtarget) != ext_mode) + { + /* Don't use LHS paradoxical subreg if explicit truncation is needed + between the mode of the extraction (word_mode) and the target + mode. Instead, create a temporary and use convert_move to set + the target. */ + if (REG_P (xtarget) + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)), + GET_MODE_BITSIZE (ext_mode))) { - if (REG_P (xtarget)) - { - int wider = (GET_MODE_SIZE (maxmode) - > GET_MODE_SIZE (GET_MODE (xtarget))); - xtarget = gen_lowpart (maxmode, xtarget); - if (wider) - xspec_target_subreg = xtarget; - } - else - xtarget = gen_reg_rtx (maxmode); + xtarget = gen_lowpart (ext_mode, xtarget); + if (GET_MODE_SIZE (ext_mode) + > GET_MODE_SIZE (GET_MODE (xspec_target))) + xspec_target_subreg = xtarget; } + else + xtarget = gen_reg_rtx (ext_mode); + } - /* If this machine's extzv insists on a register target, - make sure we have one. */ - if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate) - (xtarget, maxmode))) - xtarget = gen_reg_rtx (maxmode); + /* If this machine's ext(z)v insists on a register target, + make sure we have one. */ + if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode)) + xtarget = gen_reg_rtx (ext_mode); - bitsize_rtx = GEN_INT (bitsize); - bitpos_rtx = GEN_INT (xbitpos); + bitsize_rtx = GEN_INT (bitsize); + bitpos_rtx = GEN_INT (xbitpos); - pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx); - if (pat) - { - emit_insn (pat); - target = xtarget; - spec_target = xspec_target; - spec_target_subreg = xspec_target_subreg; - } - else - { - delete_insns_since (last); - target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, - bitpos, target, 1); - } + pat = (unsignedp + ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx) + : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx)); + if (pat) + { + emit_insn (pat); + if (xtarget == xspec_target) + return xtarget; + if (xtarget == xspec_target_subreg) + return xspec_target; + return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); } - else - extzv_loses: - target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, - bitpos, target, 1); + delete_insns_since (last); } - else - { - if (HAVE_extv - && bitsize > 0 - && GET_MODE_BITSIZE (extv_mode) >= bitsize - && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) - && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode)))) - { - int xbitpos = bitpos, xoffset = offset; - rtx bitsize_rtx, bitpos_rtx; - rtx last = get_last_insn (); - rtx xop0 = op0, xtarget = target; - rtx xspec_target = spec_target; - rtx xspec_target_subreg = spec_target_subreg; - rtx pat; - enum machine_mode maxmode = mode_for_extraction (EP_extv, 0); - - if (MEM_P (xop0)) - { - /* Is the memory operand acceptable? */ - if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate) - (xop0, GET_MODE (xop0)))) - { - /* No, load into a reg and extract from there. */ - enum machine_mode bestmode; - - /* Get the mode to use for inserting into this field. If - OP0 is BLKmode, get the smallest mode consistent with the - alignment. If OP0 is a non-BLKmode object that is no - wider than MAXMODE, use its mode. Otherwise, use the - smallest mode containing the field. */ - - if (GET_MODE (xop0) == BLKmode - || (GET_MODE_SIZE (GET_MODE (op0)) - > GET_MODE_SIZE (maxmode))) - bestmode = get_best_mode (bitsize, bitnum, - MEM_ALIGN (xop0), maxmode, - MEM_VOLATILE_P (xop0)); - else - bestmode = GET_MODE (xop0); - - if (bestmode == VOIDmode - || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0)) - && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0))) - goto extv_loses; - - /* Compute offset as multiple of this unit, - counting in bytes. */ - unit = GET_MODE_BITSIZE (bestmode); - xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); - xbitpos = bitnum % unit; - xop0 = adjust_address (xop0, bestmode, xoffset); - - /* Make sure register is big enough for the whole field. */ - if (xoffset * BITS_PER_UNIT + unit - < offset * BITS_PER_UNIT + bitsize) - goto extv_loses; - - /* Fetch it to a register in that size. */ - xop0 = force_reg (bestmode, xop0); - - /* XBITPOS counts within UNIT, which is what is expected. */ - } - else - /* Get ref to first byte containing part of the field. */ - xop0 = adjust_address (xop0, byte_mode, xoffset); - } - - /* If op0 is a register, we need it in MAXMODE (which is usually - SImode) to make it acceptable to the format of extv. */ - if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode) - goto extv_loses; - if (REG_P (xop0) && GET_MODE (xop0) != maxmode) - xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); - - /* On big-endian machines, we count bits from the most significant. - If the bit field insn does not, we must invert. */ - if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) - xbitpos = unit - bitsize - xbitpos; - /* XBITPOS counts within a size of UNIT. - Adjust to count within a size of MAXMODE. */ - if (BITS_BIG_ENDIAN && !MEM_P (xop0)) - xbitpos += (GET_MODE_BITSIZE (maxmode) - unit); + /* If OP0 is a memory, try copying it to a register and seeing if a + cheap register alternative is available. */ + if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) + { + enum machine_mode bestmode; + + /* Get the mode to use for inserting into this field. If + OP0 is BLKmode, get the smallest mode consistent with the + alignment. If OP0 is a non-BLKmode object that is no + wider than EXT_MODE, use its mode. Otherwise, use the + smallest mode containing the field. */ + + if (GET_MODE (op0) == BLKmode + || (ext_mode != MAX_MACHINE_MODE + && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) + bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), + (ext_mode == MAX_MACHINE_MODE + ? VOIDmode : ext_mode), + MEM_VOLATILE_P (op0)); + else + bestmode = GET_MODE (op0); - unit = GET_MODE_BITSIZE (maxmode); + if (bestmode != VOIDmode + && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) + && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) + { + unsigned HOST_WIDE_INT xoffset, xbitpos; - if (xtarget == 0) - xtarget = xspec_target = gen_reg_rtx (tmode); + /* Compute the offset as a multiple of this unit, + counting in bytes. */ + unit = GET_MODE_BITSIZE (bestmode); + xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); + xbitpos = bitnum % unit; - if (GET_MODE (xtarget) != maxmode) + /* Make sure the register is big enough for the whole field. */ + if (xoffset * BITS_PER_UNIT + unit + >= offset * BITS_PER_UNIT + bitsize) { - if (REG_P (xtarget)) - { - int wider = (GET_MODE_SIZE (maxmode) - > GET_MODE_SIZE (GET_MODE (xtarget))); - xtarget = gen_lowpart (maxmode, xtarget); - if (wider) - xspec_target_subreg = xtarget; - } - else - xtarget = gen_reg_rtx (maxmode); - } + rtx last, result, xop0; - /* If this machine's extv insists on a register target, - make sure we have one. */ - if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate) - (xtarget, maxmode))) - xtarget = gen_reg_rtx (maxmode); + last = get_last_insn (); - bitsize_rtx = GEN_INT (bitsize); - bitpos_rtx = GEN_INT (xbitpos); + /* Fetch it to a register in that size. */ + xop0 = adjust_address (op0, bestmode, xoffset); + xop0 = force_reg (bestmode, xop0); + result = extract_bit_field_1 (xop0, bitsize, xbitpos, + unsignedp, target, + mode, tmode, false); + if (result) + return result; - pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx); - if (pat) - { - emit_insn (pat); - target = xtarget; - spec_target = xspec_target; - spec_target_subreg = xspec_target_subreg; - } - else - { delete_insns_since (last); - target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, - bitpos, target, 0); } } - else - extv_loses: - target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, - bitpos, target, 0); } - if (target == spec_target) - return target; - if (target == spec_target_subreg) - return spec_target; - if (GET_MODE (target) != tmode && GET_MODE (target) != mode) - { - /* If the target mode is not a scalar integral, first convert to the - integer mode of that size and then access it as a floating-point - value via a SUBREG. */ - if (!SCALAR_INT_MODE_P (tmode)) - { - enum machine_mode smode - = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); - target = convert_to_mode (smode, target, unsignedp); - target = force_reg (smode, target); - return gen_lowpart (tmode, target); - } - return convert_to_mode (tmode, target, unsignedp); - } - return target; + if (!fallback_p) + return NULL; + + target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, + bitpos, target, unsignedp); + return convert_extracted_bit_field (target, mode, tmode, unsignedp); +} + +/* Generate code to extract a byte-field from STR_RTX + containing BITSIZE bits, starting at BITNUM, + and put it in TARGET if possible (if TARGET is nonzero). + Regardless of TARGET, we return the rtx for where the value is placed. + + STR_RTX is the structure containing the byte (a REG or MEM). + UNSIGNEDP is nonzero if this is an unsigned bit field. + MODE is the natural mode of the field value once extracted. + TMODE is the mode the caller would like the value to have; + but the value may be returned with type MODE instead. + + If a TARGET is specified and we can store in it at no extra cost, + we do so, and return TARGET. + Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred + if they are equally easy. */ + +rtx +extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, + enum machine_mode mode, enum machine_mode tmode) +{ + return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, + target, mode, tmode, true); } /* Extract a bit field using shifts and boolean operations @@ -1826,39 +1839,15 @@ extract_fixed_bit_field (enum machine_mode tmode, rtx op0, static rtx mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) { - HOST_WIDE_INT masklow, maskhigh; - - if (bitsize == 0) - masklow = 0; - else if (bitpos < HOST_BITS_PER_WIDE_INT) - masklow = (HOST_WIDE_INT) -1 << bitpos; - else - masklow = 0; - - if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT) - masklow &= ((unsigned HOST_WIDE_INT) -1 - >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); - - if (bitpos <= HOST_BITS_PER_WIDE_INT) - maskhigh = -1; - else - maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT); + double_int mask; - if (bitsize == 0) - maskhigh = 0; - else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT) - maskhigh &= ((unsigned HOST_WIDE_INT) -1 - >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); - else - maskhigh = 0; + mask = double_int_mask (bitsize); + mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false); if (complement) - { - maskhigh = ~maskhigh; - masklow = ~masklow; - } + mask = double_int_not (mask); - return immed_double_const (masklow, maskhigh, mode); + return immed_double_int_const (mask, mode); } /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value @@ -1867,172 +1856,14 @@ mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) static rtx lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) { - unsigned HOST_WIDE_INT v = INTVAL (value); - HOST_WIDE_INT low, high; - - if (bitsize < HOST_BITS_PER_WIDE_INT) - v &= ~((HOST_WIDE_INT) -1 << bitsize); - - if (bitpos < HOST_BITS_PER_WIDE_INT) - { - low = v << bitpos; - high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0); - } - else - { - low = 0; - high = v << (bitpos - HOST_BITS_PER_WIDE_INT); - } + double_int val; + + val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize); + val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false); - return immed_double_const (low, high, mode); + return immed_double_int_const (val, mode); } -/* Extract a bit field from a memory by forcing the alignment of the - memory. This efficient only if the field spans at least 4 boundaries. - - OP0 is the MEM. - BITSIZE is the field width; BITPOS is the position of the first bit. - UNSIGNEDP is true if the result should be zero-extended. */ - -static rtx -extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, - unsigned HOST_WIDE_INT bitpos, - int unsignedp) -{ - enum machine_mode mode, dmode; - unsigned int m_bitsize, m_size; - unsigned int sign_shift_up, sign_shift_dn; - rtx base, a1, a2, v1, v2, comb, shift, result, start; - - /* Choose a mode that will fit BITSIZE. */ - mode = smallest_mode_for_size (bitsize, MODE_INT); - m_size = GET_MODE_SIZE (mode); - m_bitsize = GET_MODE_BITSIZE (mode); - - /* Choose a mode twice as wide. Fail if no such mode exists. */ - dmode = mode_for_size (m_bitsize * 2, MODE_INT, false); - if (dmode == BLKmode) - return NULL; - - do_pending_stack_adjust (); - start = get_last_insn (); - - /* At the end, we'll need an additional shift to deal with sign/zero - extension. By default this will be a left+right shift of the - appropriate size. But we may be able to eliminate one of them. */ - sign_shift_up = sign_shift_dn = m_bitsize - bitsize; - - if (STRICT_ALIGNMENT) - { - base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT); - bitpos %= BITS_PER_UNIT; - - /* We load two values to be concatenate. There's an edge condition - that bears notice -- an aligned value at the end of a page can - only load one value lest we segfault. So the two values we load - are at "base & -size" and "(base + size - 1) & -size". If base - is unaligned, the addresses will be aligned and sequential; if - base is aligned, the addresses will both be equal to base. */ - - a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL), - GEN_INT (-(HOST_WIDE_INT)m_size), - NULL, true, OPTAB_LIB_WIDEN); - mark_reg_pointer (a1, m_bitsize); - v1 = gen_rtx_MEM (mode, a1); - set_mem_align (v1, m_bitsize); - v1 = force_reg (mode, validize_mem (v1)); - - a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1); - a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL), - GEN_INT (-(HOST_WIDE_INT)m_size), - NULL, true, OPTAB_LIB_WIDEN); - v2 = gen_rtx_MEM (mode, a2); - set_mem_align (v2, m_bitsize); - v2 = force_reg (mode, validize_mem (v2)); - - /* Combine these two values into a double-word value. */ - if (m_bitsize == BITS_PER_WORD) - { - comb = gen_reg_rtx (dmode); - emit_insn (gen_rtx_CLOBBER (VOIDmode, comb)); - emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1); - emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2); - } - else - { - if (BYTES_BIG_ENDIAN) - comb = v1, v1 = v2, v2 = comb; - v1 = convert_modes (dmode, mode, v1, true); - if (v1 == NULL) - goto fail; - v2 = convert_modes (dmode, mode, v2, true); - v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize), - NULL, true, OPTAB_LIB_WIDEN); - if (v2 == NULL) - goto fail; - comb = expand_simple_binop (dmode, IOR, v1, v2, NULL, - true, OPTAB_LIB_WIDEN); - if (comb == NULL) - goto fail; - } - - shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1), - NULL, true, OPTAB_LIB_WIDEN); - shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1); - - if (bitpos != 0) - { - if (sign_shift_up <= bitpos) - bitpos -= sign_shift_up, sign_shift_up = 0; - shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos), - NULL, true, OPTAB_LIB_WIDEN); - } - } - else - { - unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT; - bitpos %= BITS_PER_UNIT; - - /* When strict alignment is not required, we can just load directly - from memory without masking. If the remaining BITPOS offset is - small enough, we may be able to do all operations in MODE as - opposed to DMODE. */ - if (bitpos + bitsize <= m_bitsize) - dmode = mode; - comb = adjust_address (op0, dmode, offset); - - if (sign_shift_up <= bitpos) - bitpos -= sign_shift_up, sign_shift_up = 0; - shift = GEN_INT (bitpos); - } - - /* Shift down the double-word such that the requested value is at bit 0. */ - if (shift != const0_rtx) - comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT, - comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN); - if (comb == NULL) - goto fail; - - /* If the field exactly matches MODE, then all we need to do is return the - lowpart. Otherwise, shift to get the sign bits set properly. */ - result = force_reg (mode, gen_lowpart (mode, comb)); - - if (sign_shift_up) - result = expand_simple_binop (mode, ASHIFT, result, - GEN_INT (sign_shift_up), - NULL_RTX, 0, OPTAB_LIB_WIDEN); - if (sign_shift_dn) - result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT, - result, GEN_INT (sign_shift_dn), - NULL_RTX, 0, OPTAB_LIB_WIDEN); - - return result; - - fail: - delete_insns_since (start); - return NULL; -} - /* Extract a bit field that is split across two words and return an RTX for the result. @@ -2054,16 +1885,7 @@ extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, if (REG_P (op0) || GET_CODE (op0) == SUBREG) unit = BITS_PER_WORD; else - { - unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); - if (0 && bitsize / unit > 2) - { - rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos, - unsignedp); - if (tmp) - return tmp; - } - } + unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); while (bitsdone < bitsize) { @@ -2150,6 +1972,80 @@ extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, NULL_RTX, 0); } +/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving + the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than + MODE, fill the upper bits with zeros. Fail if the layout of either + mode is unknown (as for CC modes) or if the extraction would involve + unprofitable mode punning. Return the value on success, otherwise + return null. + + This is different from gen_lowpart* in these respects: + + - the returned value must always be considered an rvalue + + - when MODE is wider than SRC_MODE, the extraction involves + a zero extension + + - when MODE is smaller than SRC_MODE, the extraction involves + a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). + + In other words, this routine performs a computation, whereas the + gen_lowpart* routines are conceptually lvalue or rvalue subreg + operations. */ + +rtx +extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) +{ + enum machine_mode int_mode, src_int_mode; + + if (mode == src_mode) + return src; + + if (CONSTANT_P (src)) + { + /* simplify_gen_subreg can't be used here, as if simplify_subreg + fails, it will happily create (subreg (symbol_ref)) or similar + invalid SUBREGs. */ + unsigned int byte = subreg_lowpart_offset (mode, src_mode); + rtx ret = simplify_subreg (mode, src, src_mode, byte); + if (ret) + return ret; + + if (GET_MODE (src) == VOIDmode + || !validate_subreg (mode, src_mode, src, byte)) + return NULL_RTX; + + src = force_reg (GET_MODE (src), src); + return gen_rtx_SUBREG (mode, src, byte); + } + + if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) + return NULL_RTX; + + if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) + && MODES_TIEABLE_P (mode, src_mode)) + { + rtx x = gen_lowpart_common (mode, src); + if (x) + return x; + } + + src_int_mode = int_mode_for_mode (src_mode); + int_mode = int_mode_for_mode (mode); + if (src_int_mode == BLKmode || int_mode == BLKmode) + return NULL_RTX; + + if (!MODES_TIEABLE_P (src_int_mode, src_mode)) + return NULL_RTX; + if (!MODES_TIEABLE_P (int_mode, mode)) + return NULL_RTX; + + src = gen_lowpart (src_int_mode, src); + src = convert_modes (int_mode, src_int_mode, src, true); + src = gen_lowpart (mode, src); + return src; +} + /* Add INC into TARGET. */ void @@ -2188,23 +2084,43 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, rtx op1, temp = 0; int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); - int try; + optab lshift_optab = ashl_optab; + optab rshift_arith_optab = ashr_optab; + optab rshift_uns_optab = lshr_optab; + optab lrotate_optab = rotl_optab; + optab rrotate_optab = rotr_optab; + enum machine_mode op1_mode; + int attempt; + bool speed = optimize_insn_for_speed_p (); + + op1 = expand_normal (amount); + op1_mode = GET_MODE (op1); + + /* Determine whether the shift/rotate amount is a vector, or scalar. If the + shift amount is a vector, use the vector/vector shift patterns. */ + if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) + { + lshift_optab = vashl_optab; + rshift_arith_optab = vashr_optab; + rshift_uns_optab = vlshr_optab; + lrotate_optab = vrotl_optab; + rrotate_optab = vrotr_optab; + } /* Previously detected shift-counts computed by NEGATE_EXPR and shifted in the other direction; but that does not work on all machines. */ - op1 = expand_normal (amount); - if (SHIFT_COUNT_TRUNCATED) { - if (GET_CODE (op1) == CONST_INT + if (CONST_INT_P (op1) && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) % GET_MODE_BITSIZE (mode)); else if (GET_CODE (op1) == SUBREG - && subreg_lowpart_p (op1)) + && subreg_lowpart_p (op1) + && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))) op1 = SUBREG_REG (op1); } @@ -2214,12 +2130,12 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, /* Check whether its cheaper to implement a left shift by a constant bit count by a sequence of additions. */ if (code == LSHIFT_EXPR - && GET_CODE (op1) == CONST_INT + && CONST_INT_P (op1) && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode) && INTVAL (op1) < MAX_BITS_PER_WORD - && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode] - && shift_cost[mode][INTVAL (op1)] != MAX_COST) + && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] + && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) { int i; for (i = 0; i < INTVAL (op1); i++) @@ -2231,13 +2147,13 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, return shifted; } - for (try = 0; temp == 0 && try < 3; try++) + for (attempt = 0; temp == 0 && attempt < 3; attempt++) { enum optab_methods methods; - if (try == 0) + if (attempt == 0) methods = OPTAB_DIRECT; - else if (try == 1) + else if (attempt == 1) methods = OPTAB_WIDEN; else methods = OPTAB_LIB_WIDEN; @@ -2285,12 +2201,12 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, } temp = expand_binop (mode, - left ? rotl_optab : rotr_optab, + left ? lrotate_optab : rrotate_optab, shifted, op1, target, unsignedp, methods); } else if (unsignedp) temp = expand_binop (mode, - left ? ashl_optab : lshr_optab, + left ? lshift_optab : rshift_uns_optab, shifted, op1, target, unsignedp, methods); /* Do arithmetic shifts. @@ -2309,7 +2225,7 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, /* Arithmetic shift */ temp = expand_binop (mode, - left ? ashl_optab : ashr_optab, + left ? lshift_optab : rshift_arith_optab, shifted, op1, target, unsignedp, methods1); } @@ -2413,6 +2329,9 @@ struct alg_hash_entry { Otherwise, the cost within which multiplication by T is impossible. */ struct mult_cost cost; + + /* OPtimized for speed? */ + bool speed; }; /* The number of cache/hash entries. */ @@ -2461,11 +2380,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, struct mult_cost best_cost; struct mult_cost new_limit; int op_cost, op_latency; + unsigned HOST_WIDE_INT orig_t = t; unsigned HOST_WIDE_INT q; int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); int hash_index; bool cache_hit = false; enum alg_code cache_alg = alg_zero; + bool speed = optimize_insn_for_speed_p (); /* Indicate that no algorithm is yet found. If no algorithm is found, this value will be returned and indicate failure. */ @@ -2493,13 +2414,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, fail now. */ if (t == 0) { - if (MULT_COST_LESS (cost_limit, zero_cost)) + if (MULT_COST_LESS (cost_limit, zero_cost[speed])) return; else { alg_out->ops = 1; - alg_out->cost.cost = zero_cost; - alg_out->cost.latency = zero_cost; + alg_out->cost.cost = zero_cost[speed]; + alg_out->cost.latency = zero_cost[speed]; alg_out->op[0] = alg_zero; return; } @@ -2507,16 +2428,18 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, /* We'll be needing a couple extra algorithm structures now. */ - alg_in = alloca (sizeof (struct algorithm)); - best_alg = alloca (sizeof (struct algorithm)); + alg_in = XALLOCA (struct algorithm); + best_alg = XALLOCA (struct algorithm); best_cost = *cost_limit; /* Compute the hash index. */ - hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES; + hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; /* See if we already know what to do for T. */ if (alg_hash[hash_index].t == t && alg_hash[hash_index].mode == mode + && alg_hash[hash_index].mode == mode + && alg_hash[hash_index].speed == speed && alg_hash[hash_index].alg != alg_unknown) { cache_alg = alg_hash[hash_index].alg; @@ -2585,10 +2508,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, q = t >> m; /* The function expand_shift will choose between a shift and a sequence of additions, so the observed cost is given as - MIN (m * add_cost[mode], shift_cost[mode][m]). */ - op_cost = m * add_cost[mode]; - if (shift_cost[mode][m] < op_cost) - op_cost = shift_cost[mode][m]; + MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ + op_cost = m * add_cost[speed][mode]; + if (shift_cost[speed][mode][m] < op_cost) + op_cost = shift_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, q, &new_limit, mode); @@ -2603,6 +2526,38 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, best_alg->log[best_alg->ops] = m; best_alg->op[best_alg->ops] = alg_shift; } + + /* See if treating ORIG_T as a signed number yields a better + sequence. Try this sequence only for a negative ORIG_T + as it would be useless for a non-negative ORIG_T. */ + if ((HOST_WIDE_INT) orig_t < 0) + { + /* Shift ORIG_T as follows because a right shift of a + negative-valued signed type is implementation + defined. */ + q = ~(~orig_t >> m); + /* The function expand_shift will choose between a shift + and a sequence of additions, so the observed cost is + given as MIN (m * add_cost[speed][mode], + shift_cost[speed][mode][m]). */ + op_cost = m * add_cost[speed][mode]; + if (shift_cost[speed][mode][m] < op_cost) + op_cost = shift_cost[speed][mode][m]; + new_limit.cost = best_cost.cost - op_cost; + new_limit.latency = best_cost.latency - op_cost; + synth_mult (alg_in, q, &new_limit, mode); + + alg_in->cost.cost += op_cost; + alg_in->cost.latency += op_cost; + if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) + { + struct algorithm *x; + best_cost = alg_in->cost; + x = alg_in, alg_in = best_alg, best_alg = x; + best_alg->log[best_alg->ops] = m; + best_alg->op[best_alg->ops] = alg_shift; + } + } } if (cache_hit) goto done; @@ -2629,7 +2584,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t + 1, &new_limit, mode); @@ -2649,7 +2604,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t - 1, &new_limit, mode); @@ -2665,6 +2620,29 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, best_alg->op[best_alg->ops] = alg_add_t_m2; } } + + /* We may be able to calculate a * -7, a * -15, a * -31, etc + quickly with a - a * n for some appropriate constant n. */ + m = exact_log2 (-orig_t + 1); + if (m >= 0 && m < maxm) + { + op_cost = shiftsub1_cost[speed][mode][m]; + new_limit.cost = best_cost.cost - op_cost; + new_limit.latency = best_cost.latency - op_cost; + synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode); + + alg_in->cost.cost += op_cost; + alg_in->cost.latency += op_cost; + if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) + { + struct algorithm *x; + best_cost = alg_in->cost; + x = alg_in, alg_in = best_alg, best_alg = x; + best_alg->log[best_alg->ops] = m; + best_alg->op[best_alg->ops] = alg_sub_t_m2; + } + } + if (cache_hit) goto done; } @@ -2694,14 +2672,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to its cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[mode] + shift_cost[mode][m]; - if (shiftadd_cost[mode][m] < op_cost) + op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; + if (shiftadd_cost[speed][mode][m] < op_cost) { - op_cost = shiftadd_cost[mode][m]; + op_cost = shiftadd_cost[speed][mode][m]; op_latency = op_cost; } else - op_latency = add_cost[mode]; + op_latency = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2733,14 +2711,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to it's cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[mode] + shift_cost[mode][m]; - if (shiftsub_cost[mode][m] < op_cost) + op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; + if (shiftsub0_cost[speed][mode][m] < op_cost) { - op_cost = shiftsub_cost[mode][m]; + op_cost = shiftsub0_cost[speed][mode][m]; op_latency = op_cost; } else - op_latency = add_cost[mode]; + op_latency = add_cost[speed][mode]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2774,7 +2752,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftadd_cost[mode][m]; + op_cost = shiftadd_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); @@ -2799,7 +2777,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftsub_cost[mode][m]; + op_cost = shiftsub0_cost[speed][mode][m]; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); @@ -2830,6 +2808,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, caller. */ alg_hash[hash_index].t = t; alg_hash[hash_index].mode = mode; + alg_hash[hash_index].speed = speed; alg_hash[hash_index].alg = alg_impossible; alg_hash[hash_index].cost = *cost_limit; return; @@ -2840,6 +2819,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { alg_hash[hash_index].t = t; alg_hash[hash_index].mode = mode; + alg_hash[hash_index].speed = speed; alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; alg_hash[hash_index].cost.cost = best_cost.cost; alg_hash[hash_index].cost.latency = best_cost.latency; @@ -2879,6 +2859,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, struct algorithm alg2; struct mult_cost limit; int op_cost; + bool speed = optimize_insn_for_speed_p (); /* Fail quickly for impossible bounds. */ if (mult_cost < 0) @@ -2887,7 +2868,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, /* Ensure that mult_cost provides a reasonable upper bound. Any constant multiplication can be performed with less than 2 * bits additions. */ - op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode]; + op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode]; if (mult_cost > op_cost) mult_cost = op_cost; @@ -2900,7 +2881,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, `unsigned int' */ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) { - op_cost = neg_cost[mode]; + op_cost = neg_cost[speed][mode]; if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -2920,7 +2901,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, } /* This proves very useful for division-by-constant. */ - op_cost = add_cost[mode]; + op_cost = add_cost[speed][mode]; if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -2956,10 +2937,9 @@ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, int opno; enum machine_mode nmode; - /* Avoid referencing memory over and over. - For speed, but also for correctness when mem is volatile. */ - if (MEM_P (op0)) - op0 = force_reg (mode, op0); + /* Avoid referencing memory over and over and invalid sharing + on SUBREGs. */ + op0 = force_reg (mode, op0); /* ACCUM starts out either as OP0 or as a zero, depending on the first operation. */ @@ -3070,7 +3050,8 @@ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, insn = get_last_insn (); set_unique_reg_note (insn, REG_EQUAL, - gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far))); + gen_rtx_MULT (nmode, tem, + GEN_INT (val_so_far))); } if (variant == negate_variant) @@ -3108,6 +3089,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, enum mult_variant variant; struct algorithm algorithm; int max_cost; + bool speed = optimize_insn_for_speed_p (); /* Handling const0_rtx here allows us to use zero as a rogue value for coeff below. */ @@ -3137,7 +3119,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, any truncation. This means that multiplying by negative values does not work; results are off by 2^32 on a 32 bit machine. */ - if (GET_CODE (op1) == CONST_INT) + if (CONST_INT_P (op1)) { /* Attempt to handle multiplication of DImode values by negative coefficients, by performing the multiplication by a positive @@ -3149,8 +3131,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, result is interpreted as an unsigned coefficient. Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ - max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET) - - neg_cost[mode]; + max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed) + - neg_cost[speed][mode]; if (max_cost > 0 && choose_mult_variant (mode, -INTVAL (op1), &algorithm, &variant, max_cost)) @@ -3167,7 +3149,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, { /* If we are multiplying in DImode, it may still be a win to try to work with shifts and adds. */ - if (CONST_DOUBLE_HIGH (op1) == 0) + if (CONST_DOUBLE_HIGH (op1) == 0 + && CONST_DOUBLE_LOW (op1) > 0) coeff = CONST_DOUBLE_LOW (op1); else if (CONST_DOUBLE_LOW (op1) == 0 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1))) @@ -3179,7 +3162,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, target, unsignedp); } } - + /* We used to test optimize here, on the grounds that it's better to produce a smaller program when -O is not used. But this causes such a terrible slowdown sometimes that it seems better to always @@ -3194,7 +3177,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, /* Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ - max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET); + max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed); if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) return expand_mult_const (mode, op0, coeff, target, @@ -3437,6 +3420,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, optab moptab; rtx tem; int size; + bool speed = optimize_insn_for_speed_p (); gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); @@ -3445,7 +3429,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - if (mul_highpart_cost[mode] < max_cost) + if (mul_highpart_cost[speed][mode] < max_cost) { moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3457,8 +3441,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ if (size - 1 < BITS_PER_WORD - && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1] - + 4 * add_cost[mode] < max_cost)) + && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1] + + 4 * add_cost[speed][mode] < max_cost)) { moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3471,8 +3455,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; - if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing - && mul_widen_cost[wider_mode] < max_cost) + if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing + && mul_widen_cost[speed][wider_mode] < max_cost) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, unsignedp, OPTAB_WIDEN); @@ -3481,9 +3465,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, } /* Try widening the mode and perform a non-widening multiplication. */ - if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing + if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost) + && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost) { rtx insns, wop0, wop1; @@ -3508,10 +3492,10 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication of opposite signedness, and adjust. */ moptab = unsignedp ? smul_widen_optab : umul_widen_optab; - if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing + if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1] - + 4 * add_cost[mode] < max_cost)) + && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1] + + 4 * add_cost[speed][mode] < max_cost)) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, NULL_RTX, ! unsignedp, OPTAB_WIDEN); @@ -3549,6 +3533,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, enum mult_variant variant; struct algorithm alg; rtx tem; + bool speed = optimize_insn_for_speed_p (); gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); /* We can't support modes wider than HOST_BITS_PER_INT. */ @@ -3556,21 +3541,21 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); - /* We can't optimize modes wider than BITS_PER_WORD. - ??? We might be able to perform double-word arithmetic if + /* We can't optimize modes wider than BITS_PER_WORD. + ??? We might be able to perform double-word arithmetic if mode == word_mode, however all the cost calculations in synth_mult etc. assume single-word operations. */ if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) return expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); - extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1]; + extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; /* Check whether we try to multiply by a negative constant. */ if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) { sign_adjust = true; - extra_cost += add_cost[mode]; + extra_cost += add_cost[speed][mode]; } /* See whether shift/add multiplication is cheap enough. */ @@ -3612,8 +3597,8 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) result = gen_reg_rtx (mode); /* Avoid conditional branches when they're expensive. */ - if (BRANCH_COST >= 2 - && !optimize_size) + if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 + && optimize_insn_for_speed_p ()) { rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, mode, 0, -1); @@ -3629,8 +3614,8 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ temp = gen_rtx_LSHIFTRT (mode, result, shift); - if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing - || rtx_cost (temp, SET) > COSTS_N_INSNS (2)) + if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing + || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2)) { temp = expand_binop (mode, xor_optab, op0, signmask, NULL_RTX, 1, OPTAB_LIB_WIDEN); @@ -3712,7 +3697,9 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) logd = floor_log2 (d); shift = build_int_cst (NULL_TREE, logd); - if (d == 2 && BRANCH_COST >= 1) + if (d == 2 + && BRANCH_COST (optimize_insn_for_speed_p (), + false) >= 1) { temp = gen_reg_rtx (mode); temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); @@ -3722,7 +3709,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) } #ifdef HAVE_conditional_move - if (BRANCH_COST >= 2) + if (BRANCH_COST (optimize_insn_for_speed_p (), false) + >= 2) { rtx temp2; @@ -3751,13 +3739,14 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) } #endif - if (BRANCH_COST >= 2) + if (BRANCH_COST (optimize_insn_for_speed_p (), + false) >= 2) { int ushift = GET_MODE_BITSIZE (mode) - logd; temp = gen_reg_rtx (mode); temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); - if (shift_cost[mode][ushift] > COSTS_N_INSNS (1)) + if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1)) temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), NULL_RTX, 0, OPTAB_LIB_WIDEN); else @@ -3830,8 +3819,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, int max_cost, extra_cost; static HOST_WIDE_INT last_div_const = 0; static HOST_WIDE_INT ext_op1; + bool speed = optimize_insn_for_speed_p (); - op1_is_constant = GET_CODE (op1) == CONST_INT; + op1_is_constant = CONST_INT_P (op1); if (op1_is_constant) { ext_op1 = INTVAL (op1); @@ -3928,15 +3918,15 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing - || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) + if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing + || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing) break; if (compute_mode == VOIDmode) for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[compute_mode].libfunc - || optab2->handlers[compute_mode].libfunc) + if (optab_libfunc (optab1, compute_mode) + || optab_libfunc (optab2, compute_mode)) break; /* If we still couldn't find a mode, use MODE, but expand_binop will @@ -3960,10 +3950,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* Only deduct something for a REM if the last divide done was for a different constant. Then set the constant of the last divide. */ - max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode]; + max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode]; if (rem_flag && ! (last_div_const != 0 && op1_is_constant && INTVAL (op1) == last_div_const)) - max_cost -= mul_cost[compute_mode] + add_cost[compute_mode]; + max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode]; last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; @@ -3975,7 +3965,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* convert_modes may have placed op1 into a register, so we must recompute the following. */ - op1_is_constant = GET_CODE (op1) == CONST_INT; + op1_is_constant = CONST_INT_P (op1); op1_is_pow2 = (op1_is_constant && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) || (! unsignedp @@ -4047,10 +4037,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, { /* Most significant bit of divisor is set; emit an scc insn. */ - quotient = emit_store_flag (tquotient, GEU, op0, op1, - compute_mode, 1, 1); - if (quotient == 0) - goto fail1; + quotient = emit_store_flag_force (tquotient, GEU, op0, op1, + compute_mode, 1, 1); } else { @@ -4082,9 +4070,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; extra_cost - = (shift_cost[compute_mode][post_shift - 1] - + shift_cost[compute_mode][1] - + 2 * add_cost[compute_mode]); + = (shift_cost[speed][compute_mode][post_shift - 1] + + shift_cost[speed][compute_mode][1] + + 2 * add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4118,8 +4106,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, build_int_cst (NULL_TREE, pre_shift), NULL_RTX, 1); extra_cost - = (shift_cost[compute_mode][pre_shift] - + shift_cost[compute_mode][post_shift]); + = (shift_cost[speed][compute_mode][pre_shift] + + shift_cost[speed][compute_mode][post_shift]); t2 = expand_mult_highpart (compute_mode, t1, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4149,7 +4137,14 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, int lgup, post_shift; rtx mlr; HOST_WIDE_INT d = INTVAL (op1); - unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d; + unsigned HOST_WIDE_INT abs_d; + + /* Since d might be INT_MIN, we have to cast to + unsigned HOST_WIDE_INT before negating to avoid + undefined signed overflow. */ + abs_d = (d >= 0 + ? (unsigned HOST_WIDE_INT) d + : - (unsigned HOST_WIDE_INT) d); /* n rem d = n rem -d */ if (rem_flag && d < 0) @@ -4163,7 +4158,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, else if (d == -1) quotient = expand_unop (compute_mode, neg_optab, op0, tquotient, 0); - else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) + else if (HOST_BITS_PER_WIDE_INT >= size + && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) { /* This case is not handled correctly below. */ quotient = emit_store_flag (tquotient, EQ, op0, op1, @@ -4172,15 +4168,17 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; } else if (EXACT_POWER_OF_2_OR_ZERO_P (d) - && (rem_flag ? smod_pow2_cheap[compute_mode] - : sdiv_pow2_cheap[compute_mode]) + && (rem_flag ? smod_pow2_cheap[speed][compute_mode] + : sdiv_pow2_cheap[speed][compute_mode]) /* We assume that cheap metric is true if the optab has an expander for this mode. */ - && (((rem_flag ? smod_optab : sdiv_optab) - ->handlers[compute_mode].insn_code + && ((optab_handler ((rem_flag ? smod_optab + : sdiv_optab), + compute_mode)->insn_code != CODE_FOR_nothing) - || (sdivmod_optab->handlers[compute_mode] - .insn_code != CODE_FOR_nothing))) + || (optab_handler(sdivmod_optab, + compute_mode) + ->insn_code != CODE_FOR_nothing))) ; else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) { @@ -4191,10 +4189,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, return gen_lowpart (mode, remainder); } - if (sdiv_pow2_cheap[compute_mode] - && ((sdiv_optab->handlers[compute_mode].insn_code + if (sdiv_pow2_cheap[speed][compute_mode] + && ((optab_handler (sdiv_optab, compute_mode)->insn_code != CODE_FOR_nothing) - || (sdivmod_optab->handlers[compute_mode].insn_code + || (optab_handler (sdivmod_optab, compute_mode)->insn_code != CODE_FOR_nothing))) quotient = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, op0, @@ -4240,9 +4238,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, || size - 1 >= BITS_PER_WORD) goto fail1; - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); @@ -4277,9 +4275,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); mlr = gen_int_mode (ml, compute_mode); - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + 2 * add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + 2 * add_cost[speed][compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); @@ -4372,9 +4370,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, NULL_RTX, 0); t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); - extra_cost = (shift_cost[compute_mode][post_shift] - + shift_cost[compute_mode][size - 1] - + 2 * add_cost[compute_mode]); + extra_cost = (shift_cost[speed][compute_mode][post_shift] + + shift_cost[speed][compute_mode][size - 1] + + 2 * add_cost[speed][compute_mode]); t3 = expand_mult_highpart (compute_mode, t2, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4842,7 +4840,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = sign_expand_binop (compute_mode, umod_optab, smod_optab, op0, op1, target, unsignedp, - ((optab2->handlers[compute_mode].insn_code + ((optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); if (remainder == 0) @@ -4870,7 +4868,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, op0, op1, rem_flag ? NULL_RTX : target, unsignedp, - ((optab2->handlers[compute_mode].insn_code + ((optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); @@ -4911,7 +4909,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, if (!remainder) { remainder = gen_reg_rtx (compute_mode); - if (!expand_twoval_binop_libfunc + if (!expand_twoval_binop_libfunc (unsignedp ? udivmod_optab : sdivmod_optab, op0, op1, NULL_RTX, remainder, @@ -4954,12 +4952,12 @@ make_tree (tree type, rtx x) && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))) hi = -1; - + t = build_int_cst_wide (type, INTVAL (x), hi); - + return t; } - + case CONST_DOUBLE: if (GET_MODE (x) == VOIDmode) t = build_int_cst_wide (type, @@ -4976,17 +4974,17 @@ make_tree (tree type, rtx x) case CONST_VECTOR: { - int i, units; - rtx elt; + int units = CONST_VECTOR_NUNITS (x); + tree itype = TREE_TYPE (type); tree t = NULL_TREE; + int i; - units = CONST_VECTOR_NUNITS (x); /* Build a tree with vector elements. */ for (i = units - 1; i >= 0; --i) { - elt = CONST_VECTOR_ELT (x, i); - t = tree_cons (NULL_TREE, make_tree (type, elt), t); + rtx elt = CONST_VECTOR_ELT (x, i); + t = tree_cons (NULL_TREE, make_tree (itype, elt), t); } return build_vector (type, t); @@ -5012,20 +5010,20 @@ make_tree (tree type, rtx x) make_tree (type, XEXP (x, 1))); case LSHIFTRT: - t = lang_hooks.types.unsigned_type (type); + t = unsigned_type_for (type); return fold_convert (type, build2 (RSHIFT_EXPR, t, make_tree (t, XEXP (x, 0)), make_tree (type, XEXP (x, 1)))); case ASHIFTRT: - t = lang_hooks.types.signed_type (type); + t = signed_type_for (type); return fold_convert (type, build2 (RSHIFT_EXPR, t, make_tree (t, XEXP (x, 0)), make_tree (type, XEXP (x, 1)))); case DIV: if (TREE_CODE (type) != REAL_TYPE) - t = lang_hooks.types.signed_type (type); + t = signed_type_for (type); else t = type; @@ -5033,7 +5031,7 @@ make_tree (tree type, rtx x) make_tree (t, XEXP (x, 0)), make_tree (t, XEXP (x, 1)))); case UDIV: - t = lang_hooks.types.unsigned_type (type); + t = unsigned_type_for (type); return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, make_tree (t, XEXP (x, 0)), make_tree (t, XEXP (x, 1)))); @@ -5044,13 +5042,23 @@ make_tree (tree type, rtx x) GET_CODE (x) == ZERO_EXTEND); return fold_convert (type, make_tree (t, XEXP (x, 0))); + case CONST: + return make_tree (type, XEXP (x, 0)); + + case SYMBOL_REF: + t = SYMBOL_REF_DECL (x); + if (t) + return fold_convert (type, build_fold_addr_expr (t)); + /* else fall through. */ + default: - t = build_decl (VAR_DECL, NULL_TREE, type); + t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); - /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being - ptr_mode. So convert. */ + /* If TYPE is a POINTER_TYPE, we might need to convert X from + address mode to pointer mode. */ if (POINTER_TYPE_P (type)) - x = convert_memory_address (TYPE_MODE (type), x); + x = convert_memory_address_addr_space + (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); /* Note that we do *not* use SET_DECL_RTL here, because we do not want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ @@ -5081,36 +5089,131 @@ expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) emit_move_insn (target, tem); return target; } - -/* Emit a store-flags instruction for comparison CODE on OP0 and OP1 - and storing in TARGET. Normally return TARGET. - Return 0 if that cannot be done. - MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If - it is VOIDmode, they cannot both be CONST_INT. +/* Helper function for emit_store_flag. */ +static rtx +emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, + enum machine_mode mode, enum machine_mode compare_mode, + int unsignedp, rtx x, rtx y, int normalizep, + enum machine_mode target_mode) +{ + rtx op0, last, comparison, subtarget, pattern; + enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode; - UNSIGNEDP is for the case where we have to widen the operands - to perform the operation. It says to use zero-extension. + last = get_last_insn (); + x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); + y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); + comparison = gen_rtx_fmt_ee (code, result_mode, x, y); + if (!x || !y + || !insn_data[icode].operand[2].predicate + (x, insn_data[icode].operand[2].mode) + || !insn_data[icode].operand[3].predicate + (y, insn_data[icode].operand[3].mode) + || !insn_data[icode].operand[1].predicate (comparison, VOIDmode)) + { + delete_insns_since (last); + return NULL_RTX; + } - NORMALIZEP is 1 if we should convert the result to be either zero - or one. Normalize is -1 if we should convert the result to be - either zero or -1. If NORMALIZEP is zero, the result will be left - "raw" out of the scc insn. */ + if (target_mode == VOIDmode) + target_mode = result_mode; + if (!target) + target = gen_reg_rtx (target_mode); -rtx -emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, - enum machine_mode mode, int unsignedp, int normalizep) + if (optimize + || !(insn_data[(int) icode].operand[0].predicate (target, result_mode))) + subtarget = gen_reg_rtx (result_mode); + else + subtarget = target; + + pattern = GEN_FCN (icode) (subtarget, comparison, x, y); + if (!pattern) + return NULL_RTX; + emit_insn (pattern); + + /* If we are converting to a wider mode, first convert to + TARGET_MODE, then normalize. This produces better combining + opportunities on machines that have a SIGN_EXTRACT when we are + testing a single bit. This mostly benefits the 68k. + + If STORE_FLAG_VALUE does not have the sign bit set when + interpreted in MODE, we can do this conversion as unsigned, which + is usually more efficient. */ + if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) + { + convert_move (target, subtarget, + (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT) + && 0 == (STORE_FLAG_VALUE + & ((HOST_WIDE_INT) 1 + << (GET_MODE_BITSIZE (result_mode) -1)))); + op0 = target; + result_mode = target_mode; + } + else + op0 = subtarget; + + /* If we want to keep subexpressions around, don't reuse our last + target. */ + if (optimize) + subtarget = 0; + + /* Now normalize to the proper value in MODE. Sometimes we don't + have to do anything. */ + if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) + ; + /* STORE_FLAG_VALUE might be the most negative number, so write + the comparison this way to avoid a compiler-time warning. */ + else if (- normalizep == STORE_FLAG_VALUE) + op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); + + /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes + it hard to use a value of just the sign bit due to ANSI integer + constant typing rules. */ + else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT + && (STORE_FLAG_VALUE + & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1)))) + op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, + size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget, + normalizep == 1); + else + { + gcc_assert (STORE_FLAG_VALUE & 1); + + op0 = expand_and (result_mode, op0, const1_rtx, subtarget); + if (normalizep == -1) + op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); + } + + /* If we were converting to a smaller mode, do the conversion now. */ + if (target_mode != result_mode) + { + convert_move (target, op0, 0); + return target; + } + else + return op0; +} + + +/* A subroutine of emit_store_flag only including "tricks" that do not + need a recursive call. These are kept separate to avoid infinite + loops. */ + +static rtx +emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, + enum machine_mode mode, int unsignedp, int normalizep, + enum machine_mode target_mode) { rtx subtarget; enum insn_code icode; enum machine_mode compare_mode; - enum machine_mode target_mode = GET_MODE (target); + enum mode_class mclass; + enum rtx_code scode; rtx tem; - rtx last = get_last_insn (); - rtx pattern, comparison; if (unsignedp) code = unsigned_condition (code); + scode = swap_condition (code); /* If one operand is constant, make it the second one. Only do this if the other operand is not constant as well. */ @@ -5169,18 +5272,20 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, if ((code == EQ || code == NE) && (op1 == const0_rtx || op1 == constm1_rtx)) { - rtx op00, op01, op0both; + rtx op00, op01; - /* Do a logical OR or AND of the two words and compare the result. */ + /* Do a logical OR or AND of the two words and compare the + result. */ op00 = simplify_gen_subreg (word_mode, op0, mode, 0); op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); - op0both = expand_binop (word_mode, - op1 == const0_rtx ? ior_optab : and_optab, - op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT); - - if (op0both != 0) - return emit_store_flag (target, code, op0both, op1, word_mode, - unsignedp, normalizep); + tem = expand_binop (word_mode, + op1 == const0_rtx ? ior_optab : and_optab, + op00, op01, NULL_RTX, unsignedp, + OPTAB_DIRECT); + + if (tem != 0) + tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, + unsignedp, normalizep); } else if ((code == LT || code == GE) && op1 == const0_rtx) { @@ -5188,14 +5293,28 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, /* If testing the sign bit, can just test on high word. */ op0h = simplify_gen_subreg (word_mode, op0, mode, - subreg_highpart_offset (word_mode, mode)); - return emit_store_flag (target, code, op0h, op1, word_mode, - unsignedp, normalizep); + subreg_highpart_offset (word_mode, + mode)); + tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, + unsignedp, normalizep); } - } + else + tem = NULL_RTX; - /* From now on, we won't change CODE, so set ICODE now. */ - icode = setcc_gen_code[(int) code]; + if (tem) + { + if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) + return tem; + if (!target) + target = gen_reg_rtx (target_mode); + + convert_move (target, tem, + 0 == ((normalizep ? normalizep : STORE_FLAG_VALUE) + & ((HOST_WIDE_INT) 1 + << (GET_MODE_BITSIZE (word_mode) -1)))); + return target; + } + } /* If this is A < 0 or A >= 0, we can do this by taking the ones complement of A (for GE) and shifting the sign bit to the low bit. */ @@ -5204,14 +5323,18 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, && (normalizep || STORE_FLAG_VALUE == 1 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) - == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))) + == ((unsigned HOST_WIDE_INT) 1 + << (GET_MODE_BITSIZE (mode) - 1)))))) { subtarget = target; + if (!target) + target_mode = mode; + /* If the result is to be wider than OP0, it is best to convert it first. If it is to be narrower, it is *incorrect* to convert it first. */ - if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) + else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) { op0 = convert_modes (target_mode, mode, op0, 0); mode = target_mode; @@ -5239,150 +5362,185 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, return op0; } - if (icode != CODE_FOR_nothing) + mclass = GET_MODE_CLASS (mode); + for (compare_mode = mode; compare_mode != VOIDmode; + compare_mode = GET_MODE_WIDER_MODE (compare_mode)) { - insn_operand_predicate_fn pred; - - /* We think we may be able to do this with a scc insn. Emit the - comparison and then the scc insn. */ - - do_pending_stack_adjust (); - last = get_last_insn (); - - comparison - = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX); - if (CONSTANT_P (comparison)) + enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; + icode = optab_handler (cstore_optab, optab_mode)->insn_code; + if (icode != CODE_FOR_nothing) { - switch (GET_CODE (comparison)) + do_pending_stack_adjust (); + tem = emit_cstore (target, icode, code, mode, compare_mode, + unsignedp, op0, op1, normalizep, target_mode); + if (tem) + return tem; + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - case CONST_INT: - if (comparison == const0_rtx) - return const0_rtx; - break; - -#ifdef FLOAT_STORE_FLAG_VALUE - case CONST_DOUBLE: - if (comparison == CONST0_RTX (GET_MODE (comparison))) - return const0_rtx; - break; -#endif - default: - gcc_unreachable (); + tem = emit_cstore (target, icode, scode, mode, compare_mode, + unsignedp, op1, op0, normalizep, target_mode); + if (tem) + return tem; } - - if (normalizep == 1) - return const1_rtx; - if (normalizep == -1) - return constm1_rtx; - return const_true_rtx; + break; } + } - /* The code of COMPARISON may not match CODE if compare_from_rtx - decided to swap its operands and reverse the original code. + return 0; +} - We know that compare_from_rtx returns either a CONST_INT or - a new comparison code, so it is safe to just extract the - code from COMPARISON. */ - code = GET_CODE (comparison); +/* Emit a store-flags instruction for comparison CODE on OP0 and OP1 + and storing in TARGET. Normally return TARGET. + Return 0 if that cannot be done. - /* Get a reference to the target in the proper mode for this insn. */ - compare_mode = insn_data[(int) icode].operand[0].mode; - subtarget = target; - pred = insn_data[(int) icode].operand[0].predicate; - if (optimize || ! (*pred) (subtarget, compare_mode)) - subtarget = gen_reg_rtx (compare_mode); + MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If + it is VOIDmode, they cannot both be CONST_INT. - pattern = GEN_FCN (icode) (subtarget); - if (pattern) - { - emit_insn (pattern); + UNSIGNEDP is for the case where we have to widen the operands + to perform the operation. It says to use zero-extension. - /* If we are converting to a wider mode, first convert to - TARGET_MODE, then normalize. This produces better combining - opportunities on machines that have a SIGN_EXTRACT when we are - testing a single bit. This mostly benefits the 68k. + NORMALIZEP is 1 if we should convert the result to be either zero + or one. Normalize is -1 if we should convert the result to be + either zero or -1. If NORMALIZEP is zero, the result will be left + "raw" out of the scc insn. */ - If STORE_FLAG_VALUE does not have the sign bit set when - interpreted in COMPARE_MODE, we can do this conversion as - unsigned, which is usually more efficient. */ - if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode)) - { - convert_move (target, subtarget, - (GET_MODE_BITSIZE (compare_mode) - <= HOST_BITS_PER_WIDE_INT) - && 0 == (STORE_FLAG_VALUE - & ((HOST_WIDE_INT) 1 - << (GET_MODE_BITSIZE (compare_mode) -1)))); - op0 = target; - compare_mode = target_mode; - } - else - op0 = subtarget; +rtx +emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, + enum machine_mode mode, int unsignedp, int normalizep) +{ + enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; + enum rtx_code rcode; + rtx subtarget; + rtx tem, last, trueval; - /* If we want to keep subexpressions around, don't reuse our - last target. */ + tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, + target_mode); + if (tem) + return tem; - if (optimize) - subtarget = 0; + /* If we reached here, we can't do this with a scc insn, however there + are some comparisons that can be done in other ways. Don't do any + of these cases if branches are very cheap. */ + if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) + return 0; - /* Now normalize to the proper value in COMPARE_MODE. Sometimes - we don't have to do anything. */ - if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) - ; - /* STORE_FLAG_VALUE might be the most negative number, so write - the comparison this way to avoid a compiler-time warning. */ - else if (- normalizep == STORE_FLAG_VALUE) - op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0); - - /* We don't want to use STORE_FLAG_VALUE < 0 below since this - makes it hard to use a value of just the sign bit due to - ANSI integer constant typing rules. */ - else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT - && (STORE_FLAG_VALUE - & ((HOST_WIDE_INT) 1 - << (GET_MODE_BITSIZE (compare_mode) - 1)))) - op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0, - size_int (GET_MODE_BITSIZE (compare_mode) - 1), - subtarget, normalizep == 1); - else - { - gcc_assert (STORE_FLAG_VALUE & 1); - - op0 = expand_and (compare_mode, op0, const1_rtx, subtarget); - if (normalizep == -1) - op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0); - } + /* See what we need to return. We can only return a 1, -1, or the + sign bit. */ - /* If we were converting to a smaller mode, do the - conversion now. */ - if (target_mode != compare_mode) - { - convert_move (target, op0, 0); - return target; - } - else - return op0; - } + if (normalizep == 0) + { + if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) + normalizep = STORE_FLAG_VALUE; + + else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT + && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) + == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) + ; + else + return 0; } - delete_insns_since (last); + last = get_last_insn (); /* If optimizing, use different pseudo registers for each insn, instead of reusing the same pseudo. This leads to better CSE, but slows down the compiler, since there are more pseudos */ subtarget = (!optimize && (target_mode == mode)) ? target : NULL_RTX; + trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); + + /* For floating-point comparisons, try the reverse comparison or try + changing the "orderedness" of the comparison. */ + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + enum rtx_code first_code; + bool and_them; + + rcode = reverse_condition_maybe_unordered (code); + if (can_compare_p (rcode, mode, ccp_store_flag) + && (code == ORDERED || code == UNORDERED + || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) + || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) + { + int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) + || (STORE_FLAG_VALUE == -1 && normalizep == 1)); + + /* For the reverse comparison, use either an addition or a XOR. */ + if (want_add + && rtx_cost (GEN_INT (normalizep), PLUS, + optimize_insn_for_speed_p ()) == 0) + { + tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, + STORE_FLAG_VALUE, target_mode); + if (tem) + return expand_binop (target_mode, add_optab, tem, + GEN_INT (normalizep), + target, 0, OPTAB_WIDEN); + } + else if (!want_add + && rtx_cost (trueval, XOR, + optimize_insn_for_speed_p ()) == 0) + { + tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, + normalizep, target_mode); + if (tem) + return expand_binop (target_mode, xor_optab, tem, trueval, + target, INTVAL (trueval) >= 0, OPTAB_WIDEN); + } + } + + delete_insns_since (last); + + /* Cannot split ORDERED and UNORDERED, only try the above trick. */ + if (code == ORDERED || code == UNORDERED) + return 0; + + and_them = split_comparison (code, mode, &first_code, &code); + + /* If there are no NaNs, the first comparison should always fall through. + Effectively change the comparison to the other one. */ + if (!HONOR_NANS (mode)) + { + gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); + return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, + target_mode); + } + +#ifdef HAVE_conditional_move + /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a + conditional move. */ + tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, + normalizep, target_mode); + if (tem == 0) + return 0; + + if (and_them) + tem = emit_conditional_move (target, code, op0, op1, mode, + tem, const0_rtx, GET_MODE (tem), 0); + else + tem = emit_conditional_move (target, code, op0, op1, mode, + trueval, tem, GET_MODE (tem), 0); + + if (tem == 0) + delete_insns_since (last); + return tem; +#else + return 0; +#endif + } + + /* The remaining tricks only apply to integer comparisons. */ + + if (GET_MODE_CLASS (mode) != MODE_INT) + return 0; - /* If we reached here, we can't do this with a scc insn. However, there - are some comparisons that can be done directly. For example, if - this is an equality comparison of integers, we can try to exclusive-or + /* If this is an equality comparison of integers, we can try to exclusive-or (or subtract) the two operands and use a recursive call to try the comparison with zero. Don't do any of these cases if branches are very cheap. */ - if (BRANCH_COST > 0 - && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE) - && op1 != const0_rtx) + if ((code == EQ || code == NE) && op1 != const0_rtx) { tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, OPTAB_WIDEN); @@ -5393,9 +5551,50 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, if (tem != 0) tem = emit_store_flag (target, code, tem, const0_rtx, mode, unsignedp, normalizep); - if (tem == 0) - delete_insns_since (last); - return tem; + if (tem != 0) + return tem; + + delete_insns_since (last); + } + + /* For integer comparisons, try the reverse comparison. However, for + small X and if we'd have anyway to extend, implementing "X != 0" + as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ + rcode = reverse_condition (code); + if (can_compare_p (rcode, mode, ccp_store_flag) + && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing + && code == NE + && GET_MODE_SIZE (mode) < UNITS_PER_WORD + && op1 == const0_rtx)) + { + int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) + || (STORE_FLAG_VALUE == -1 && normalizep == 1)); + + /* Again, for the reverse comparison, use either an addition or a XOR. */ + if (want_add + && rtx_cost (GEN_INT (normalizep), PLUS, + optimize_insn_for_speed_p ()) == 0) + { + tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, + STORE_FLAG_VALUE, target_mode); + if (tem != 0) + tem = expand_binop (target_mode, add_optab, tem, + GEN_INT (normalizep), target, 0, OPTAB_WIDEN); + } + else if (!want_add + && rtx_cost (trueval, XOR, + optimize_insn_for_speed_p ()) == 0) + { + tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, + normalizep, target_mode); + if (tem != 0) + tem = expand_binop (target_mode, xor_optab, tem, trueval, target, + INTVAL (trueval) >= 0, OPTAB_WIDEN); + } + + if (tem != 0) + return tem; + delete_insns_since (last); } /* Some other cases we can do are EQ, NE, LE, and GT comparisons with @@ -5403,28 +5602,12 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, do LE and GT if branches are expensive since they are expensive on 2-operand machines. */ - if (BRANCH_COST == 0 - || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx + if (op1 != const0_rtx || (code != EQ && code != NE - && (BRANCH_COST <= 1 || (code != LE && code != GT)))) + && (BRANCH_COST (optimize_insn_for_speed_p (), + false) <= 1 || (code != LE && code != GT)))) return 0; - /* See what we need to return. We can only return a 1, -1, or the - sign bit. */ - - if (normalizep == 0) - { - if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) - normalizep = STORE_FLAG_VALUE; - - else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT - && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) - == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) - ; - else - return 0; - } - /* Try to put the result of the comparison in the sign bit. Assume we can't do the necessary operation below. */ @@ -5478,9 +5661,9 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, that is compensated by the subsequent overflow when subtracting one / negating. */ - if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing) + if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing) tem = expand_unop (mode, abs_optab, op0, subtarget, 1); - else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing) + else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing) tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) { @@ -5502,7 +5685,10 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, that "or", which is an extra insn, so we only handle EQ if branches are expensive. */ - if (tem == 0 && (code == NE || BRANCH_COST > 1)) + if (tem == 0 + && (code == NE + || BRANCH_COST (optimize_insn_for_speed_p (), + false) > 1)) { if (rtx_equal_p (subtarget, op0)) subtarget = 0; @@ -5523,7 +5709,9 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, if (tem) { - if (GET_MODE (tem) != target_mode) + if (!target) + ; + else if (GET_MODE (tem) != target_mode) { convert_move (target, tem, 0); tem = target; @@ -5547,27 +5735,68 @@ emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, enum machine_mode mode, int unsignedp, int normalizep) { rtx tem, label; + rtx trueval, falseval; /* First see if emit_store_flag can do the job. */ tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); if (tem != 0) return tem; - if (normalizep == 0) - normalizep = 1; + if (!target) + target = gen_reg_rtx (word_mode); - /* If this failed, we have to do this with set/compare/jump/set code. */ + /* If this failed, we have to do this with set/compare/jump/set code. + For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ + trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; + if (code == NE + && GET_MODE_CLASS (mode) == MODE_INT + && REG_P (target) + && op0 == target + && op1 == const0_rtx) + { + label = gen_label_rtx (); + do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, + mode, NULL_RTX, NULL_RTX, label, -1); + emit_move_insn (target, trueval); + emit_label (label); + return target; + } if (!REG_P (target) || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) target = gen_reg_rtx (GET_MODE (target)); - emit_move_insn (target, const1_rtx); + /* Jump in the right direction if the target cannot implement CODE + but can jump on its reverse condition. */ + falseval = const0_rtx; + if (! can_compare_p (code, mode, ccp_jump) + && (! FLOAT_MODE_P (mode) + || code == ORDERED || code == UNORDERED + || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) + || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) + { + enum rtx_code rcode; + if (FLOAT_MODE_P (mode)) + rcode = reverse_condition_maybe_unordered (code); + else + rcode = reverse_condition (code); + + /* Canonicalize to UNORDERED for the libcall. */ + if (can_compare_p (rcode, mode, ccp_jump) + || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) + { + falseval = trueval; + trueval = const0_rtx; + code = rcode; + } + } + + emit_move_insn (target, trueval); label = gen_label_rtx (); do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, - NULL_RTX, label); + NULL_RTX, label, -1); - emit_move_insn (target, const0_rtx); + emit_move_insn (target, falseval); emit_label (label); return target; @@ -5583,5 +5812,5 @@ do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, { int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, - NULL_RTX, NULL_RTX, label); + NULL_RTX, NULL_RTX, label, -1); }