X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-ssa-loop-ivopts.c;h=7556f53d3e491b0b2df7e51609bbcdfb24a28ac3;hb=3b01567f28b62f8d753edb3a490064eeeabb5420;hp=bda640f3e0eafb11b834b423d38cc006a80cfb9d;hpb=134c053efb725c8dfa8238c3329ea0439f96df09;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index bda640f3e0e..7556f53d3e4 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -1,5 +1,5 @@ /* Induction variable optimizations. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of GCC. @@ -70,14 +70,12 @@ along with GCC; see the file COPYING3. If not see #include "tm_p.h" #include "basic-block.h" #include "output.h" -#include "diagnostic.h" #include "tree-pretty-print.h" #include "gimple-pretty-print.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" #include "cfgloop.h" -#include "expr.h" #include "tree-pass.h" #include "ggc.h" #include "insn-config.h" @@ -91,14 +89,38 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "tree-affine.h" #include "target.h" +#include "tree-inline.h" +#include "tree-ssa-propagate.h" + +/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses. + */ +#include "expmed.h" +#undef add_cost +#undef zero_cost + +/* FIXME: Expressions are expanded to RTL in this pass to determine the + cost of different addressing modes. This should be moved to a TBD + interface between the GIMPLE and RTL worlds. */ +#include "expr.h" /* The infinite cost. */ #define INFTY 10000000 -/* The expected number of loop iterations. TODO -- use profiling instead of - this. */ #define AVG_LOOP_NITER(LOOP) 5 +/* Returns the expected number of loop iterations for LOOP. + The average trip count is computed from profile data if it + exists. */ + +static inline HOST_WIDE_INT +avg_loop_niter (struct loop *loop) +{ + HOST_WIDE_INT niter = max_stmt_executions_int (loop, false); + if (niter == -1) + return AVG_LOOP_NITER (loop); + + return niter; +} /* Representation of the induction variable. */ struct iv @@ -154,6 +176,8 @@ struct cost_pair tree value; /* For final value elimination, the expression for the final value of the iv. For iv elimination, the new bound to compare with. */ + enum tree_code comp; /* For iv elimination, the comparison. */ + int inv_expr_id; /* Loop invariant expression id. */ }; /* Use. */ @@ -208,6 +232,14 @@ struct iv_cand biv. */ }; +/* Loop invariant expression hashtable entry. */ +struct iv_inv_expr_ent +{ + tree expr; + int id; + hashval_t hash; +}; + /* The data used by the induction variable optimizations. */ typedef struct iv_use *iv_use_p; @@ -235,6 +267,13 @@ struct ivopts_data /* The array of information for the ssa names. */ struct version_info *version_info; + /* The hashtable of loop invariant expressions created + by ivopt. */ + htab_t inv_expr_tab; + + /* Loop invariant expression id. */ + int inv_expr_id; + /* The bitmap of indices in version_info whose value was changed. */ bitmap relevant; @@ -256,6 +295,12 @@ struct ivopts_data /* Are we optimizing for speed? */ bool speed; + + /* Whether the loop body includes any function calls. */ + bool body_includes_call; + + /* Whether the loop body can only be exited via single exit. */ + bool loop_single_exit_p; }; /* An assignment of iv candidates to uses. */ @@ -292,6 +337,13 @@ struct iv_ca /* Number of times each invariant is used. */ unsigned *n_invariant_uses; + /* The array holding the number of uses of each loop + invariant expressions created by ivopt. */ + unsigned *used_inv_expr; + + /* The number of created loop invariants. */ + unsigned num_used_inv_expr; + /* Total cost of the assignment. */ comp_cost cost; }; @@ -335,6 +387,8 @@ struct iv_ca_delta static VEC(tree,heap) *decl_rtl_to_reset; +static comp_cost force_expr_to_var_cost (tree, bool); + /* Number of uses recorded in DATA. */ static inline unsigned @@ -513,6 +567,19 @@ dump_cand (FILE *file, struct iv_cand *cand) return; } + if (cand->var_before) + { + fprintf (file, " var_before "); + print_generic_expr (file, cand->var_before, TDF_SLIM); + fprintf (file, "\n"); + } + if (cand->var_after) + { + fprintf (file, " var_after "); + print_generic_expr (file, cand->var_after, TDF_SLIM); + fprintf (file, "\n"); + } + switch (cand->pos) { case IP_NORMAL: @@ -707,14 +774,13 @@ contains_abnormal_ssa_name_p (tree expr) return false; } -/* Returns tree describing number of iterations determined from +/* Returns the structure describing number of iterations determined from EXIT of DATA->current_loop, or NULL if something goes wrong. */ -static tree +static struct tree_niter_desc * niter_for_exit (struct ivopts_data *data, edge exit) { - struct tree_niter_desc desc; - tree niter; + struct tree_niter_desc *desc; void **slot; if (!data->niters) @@ -727,32 +793,31 @@ niter_for_exit (struct ivopts_data *data, edge exit) if (!slot) { - /* Try to determine number of iterations. We must know it - unconditionally (i.e., without possibility of # of iterations - being zero). Also, we cannot safely work with ssa names that - appear in phi nodes on abnormal edges, so that we do not create - overlapping life ranges for them (PR 27283). */ - if (number_of_iterations_exit (data->current_loop, - exit, &desc, true) - && integer_zerop (desc.may_be_zero) - && !contains_abnormal_ssa_name_p (desc.niter)) - niter = desc.niter; - else - niter = NULL_TREE; - - *pointer_map_insert (data->niters, exit) = niter; + /* Try to determine number of iterations. We cannot safely work with ssa + names that appear in phi nodes on abnormal edges, so that we do not + create overlapping life ranges for them (PR 27283). */ + desc = XNEW (struct tree_niter_desc); + if (!number_of_iterations_exit (data->current_loop, + exit, desc, true) + || contains_abnormal_ssa_name_p (desc->niter)) + { + XDELETE (desc); + desc = NULL; + } + slot = pointer_map_insert (data->niters, exit); + *slot = desc; } else - niter = (tree) *slot; + desc = (struct tree_niter_desc *) *slot; - return niter; + return desc; } -/* Returns tree describing number of iterations determined from +/* Returns the structure describing number of iterations determined from single dominating exit of DATA->current_loop, or NULL if something goes wrong. */ -static tree +static struct tree_niter_desc * niter_for_single_dom_exit (struct ivopts_data *data) { edge exit = single_dom_exit (data->current_loop); @@ -763,6 +828,30 @@ niter_for_single_dom_exit (struct ivopts_data *data) return niter_for_exit (data, exit); } +/* Hash table equality function for expressions. */ + +static int +htab_inv_expr_eq (const void *ent1, const void *ent2) +{ + const struct iv_inv_expr_ent *expr1 = + (const struct iv_inv_expr_ent *)ent1; + const struct iv_inv_expr_ent *expr2 = + (const struct iv_inv_expr_ent *)ent2; + + return expr1->hash == expr2->hash + && operand_equal_p (expr1->expr, expr2->expr, 0); +} + +/* Hash function for loop invariant expressions. */ + +static hashval_t +htab_inv_expr_hash (const void *ent) +{ + const struct iv_inv_expr_ent *expr = + (const struct iv_inv_expr_ent *)ent; + return expr->hash; +} + /* Initializes data structures used by the iv optimization pass, stored in DATA. */ @@ -777,6 +866,9 @@ tree_ssa_iv_optimize_init (struct ivopts_data *data) data->niters = NULL; data->iv_uses = VEC_alloc (iv_use_p, heap, 20); data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20); + data->inv_expr_tab = htab_create (10, htab_inv_expr_hash, + htab_inv_expr_eq, free); + data->inv_expr_id = 0; decl_rtl_to_reset = VEC_alloc (tree, heap, 20); } @@ -810,7 +902,7 @@ determine_base_object (tree expr) if (!base) return expr; - if (TREE_CODE (base) == INDIRECT_REF) + if (TREE_CODE (base) == MEM_REF) return determine_base_object (TREE_OPERAND (base, 0)); return fold_convert (ptr_type_node, @@ -939,7 +1031,7 @@ find_bivs (struct ivopts_data *data) if (step) { if (POINTER_TYPE_P (type)) - step = fold_convert (sizetype, step); + step = convert_to_ptrofftype (step); else step = fold_convert (type, step); } @@ -1014,6 +1106,12 @@ find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv) || contains_abnormal_ssa_name_p (iv->step)) return false; + /* If STMT could throw, then do not consider STMT as defining a GIV. + While this will suppress optimizations, we can not safely delete this + GIV and associated statements, even if it appears it is not used. */ + if (stmt_could_throw_p (stmt)) + return false; + return true; } @@ -1072,12 +1170,17 @@ find_induction_variables (struct ivopts_data *data) if (dump_file && (dump_flags & TDF_DETAILS)) { - tree niter = niter_for_single_dom_exit (data); + struct tree_niter_desc *niter = niter_for_single_dom_exit (data); if (niter) { fprintf (dump_file, " number of iterations "); - print_generic_expr (dump_file, niter, TDF_SLIM); + print_generic_expr (dump_file, niter->niter, TDF_SLIM); + if (!integer_zerop (niter->may_be_zero)) + { + fprintf (dump_file, "; zero if "); + print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); + } fprintf (dump_file, "\n\n"); }; @@ -1356,10 +1459,6 @@ idx_find_step (tree base, tree *idx, void *data) tree step, iv_base, iv_step, lbound, off; struct loop *loop = dta->ivopts_data->current_loop; - if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF - || TREE_CODE (base) == ALIGN_INDIRECT_REF) - return false; - /* If base is a component ref, require that the offset of the reference be invariant. */ if (TREE_CODE (base) == COMPONENT_REF) @@ -1412,7 +1511,7 @@ idx_find_step (tree base, tree *idx, void *data) } else /* The step for pointer arithmetics already is 1 byte. */ - step = build_int_cst (sizetype, 1); + step = size_one_node; iv_base = iv->base; iv_step = iv->step; @@ -1537,7 +1636,8 @@ may_be_unaligned_p (tree ref, tree step) base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode, &unsignedp, &volatilep, true); base_type = TREE_TYPE (base); - base_align = TYPE_ALIGN (base_type); + base_align = get_object_alignment (base); + base_align = MAX (base_align, TYPE_ALIGN (base_type)); if (mode != BLKmode) { @@ -1561,7 +1661,7 @@ may_be_unaligned_p (tree ref, tree step) /* Return true if EXPR may be non-addressable. */ -static bool +bool may_be_nonaddressable_p (tree expr) { switch (TREE_CODE (expr)) @@ -1605,7 +1705,7 @@ may_be_nonaddressable_p (tree expr) static void find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p) { - tree base = *op_p, step = build_int_cst (sizetype, 0); + tree base = *op_p, step = size_zero_node; struct iv *civ; struct ifs_ivopts_data ifs_ivopts_data; @@ -1636,6 +1736,16 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p TMR_BASE (base) = civ->base; step = civ->step; } + if (TMR_INDEX2 (base) + && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) + { + civ = get_iv (data, TMR_INDEX2 (base)); + if (!civ) + goto fail; + + TMR_INDEX2 (base) = civ->base; + step = civ->step; + } if (TMR_INDEX (base) && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) { @@ -1663,15 +1773,12 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p { ifs_ivopts_data.ivopts_data = data; ifs_ivopts_data.stmt = stmt; - ifs_ivopts_data.step = build_int_cst (sizetype, 0); + ifs_ivopts_data.step = size_zero_node; if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) || integer_zerop (ifs_ivopts_data.step)) goto fail; step = ifs_ivopts_data.step; - gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF); - gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF); - /* Check that the base expression is addressable. This needs to be done after substituting bases of IVs into it. */ if (may_be_nonaddressable_p (base)) @@ -1691,9 +1798,11 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p tree *ref = &TREE_OPERAND (base, 0); while (handled_component_p (*ref)) ref = &TREE_OPERAND (*ref, 0); - if (TREE_CODE (*ref) == INDIRECT_REF) + if (TREE_CODE (*ref) == MEM_REF) { - tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0)); + tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), + TREE_OPERAND (*ref, 0), + TREE_OPERAND (*ref, 1)); if (tem) *ref = tem; } @@ -1827,7 +1936,7 @@ find_interesting_uses_outside (struct ivopts_data *data, edge exit) phi = gsi_stmt (psi); def = PHI_ARG_DEF_FROM_EDGE (phi, exit); if (is_gimple_reg (def)) - find_interesting_uses_op (data, def); + find_interesting_uses_op (data, def); } } @@ -2015,7 +2124,8 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref, expr = build_fold_addr_expr (op0); return fold_convert (orig_type, expr); - case INDIRECT_REF: + case MEM_REF: + /* ??? Offset operand? */ inside_addr = false; break; @@ -2108,7 +2218,10 @@ add_candidate_1 (struct ivopts_data *data, struct iv_cand *cand = NULL; tree type, orig_type; - if (base) + /* For non-original variables, make sure their values are computed in a type + that does not invoke undefined behavior on overflows (since in general, + we cannot prove that these induction variables are non-wrapping). */ + if (pos != IP_ORIGINAL) { orig_type = TREE_TYPE (base); type = generic_type_for (orig_type); @@ -2143,7 +2256,9 @@ add_candidate_1 (struct ivopts_data *data, continue; if (operand_equal_p (base, cand->iv->base, 0) - && operand_equal_p (step, cand->iv->step, 0)) + && operand_equal_p (step, cand->iv->step, 0) + && (TYPE_PRECISION (TREE_TYPE (base)) + == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) break; } @@ -2552,12 +2667,13 @@ infinite_cost_p (comp_cost cost) /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends on invariants DEPENDS_ON and that the value used in expressing it - is VALUE. */ + is VALUE, and in case of iv elimination the comparison operator is COMP. */ static void set_use_iv_cost (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, - comp_cost cost, bitmap depends_on, tree value) + comp_cost cost, bitmap depends_on, tree value, + enum tree_code comp, int inv_expr_id) { unsigned i, s; @@ -2573,6 +2689,8 @@ set_use_iv_cost (struct ivopts_data *data, use->cost_map[cand->id].cost = cost; use->cost_map[cand->id].depends_on = depends_on; use->cost_map[cand->id].value = value; + use->cost_map[cand->id].comp = comp; + use->cost_map[cand->id].inv_expr_id = inv_expr_id; return; } @@ -2592,6 +2710,8 @@ found: use->cost_map[i].cost = cost; use->cost_map[i].depends_on = depends_on; use->cost_map[i].value = value; + use->cost_map[i].comp = comp; + use->cost_map[i].inv_expr_id = inv_expr_id; } /* Gets cost of (USE, CANDIDATE) pair. */ @@ -2640,7 +2760,7 @@ seq_cost (rtx seq, bool speed) { set = single_set (seq); if (set) - cost += rtx_cost (set, SET,speed); + cost += set_src_cost (SET_SRC (set), speed); else cost++; } @@ -2744,7 +2864,7 @@ computation_cost (tree expr, bool speed) unsigned cost; /* Avoid using hard regs in ways which may be unsupported. */ int regno = LAST_VIRTUAL_REGISTER + 1; - struct cgraph_node *node = cgraph_node (current_function_decl); + struct cgraph_node *node = cgraph_get_node (current_function_decl); enum node_frequency real_frequency = node->frequency; node->frequency = NODE_FREQUENCY_NORMAL; @@ -2761,6 +2881,8 @@ computation_cost (tree expr, bool speed) if (MEM_P (rslt)) cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), TYPE_ADDR_SPACE (type), speed); + else if (!REG_P (rslt)) + cost += set_src_cost (rslt, speed); return cost; } @@ -2776,26 +2898,6 @@ var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt) return cand->var_before; } -/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb, - but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */ - -int -tree_int_cst_sign_bit (const_tree t) -{ - unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1; - unsigned HOST_WIDE_INT w; - - if (bitno < HOST_BITS_PER_WIDE_INT) - w = TREE_INT_CST_LOW (t); - else - { - w = TREE_INT_CST_HIGH (t); - bitno -= HOST_BITS_PER_WIDE_INT; - } - - return (w >> bitno) & 1; -} - /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the same precision that is at least as wide as the precision of TYPE, stores BA to A and BB to B, and returns the type of BA. Otherwise, returns the @@ -2933,6 +3035,20 @@ get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) return get_computation_at (loop, use, cand, use->stmt); } +/* Adjust the cost COST for being in loop setup rather than loop body. + If we're optimizing for space, the loop setup overhead is constant; + if we're optimizing for speed, amortize it over the per-iteration cost. */ +static unsigned +adjust_setup_cost (struct ivopts_data *data, unsigned cost) +{ + if (cost == INFTY) + return cost; + else if (optimize_loop_for_speed_p (data->current_loop)) + return cost / avg_loop_niter (data->current_loop); + else + return cost; +} + /* Returns cost of addition in MODE. */ static unsigned @@ -3141,9 +3257,8 @@ get_address_cost (bool symbol_present, bool var_present, if (!data) { HOST_WIDE_INT i; - HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT; - HOST_WIDE_INT rat, off; - int old_cse_not_expected; + HOST_WIDE_INT rat, off = 0; + int old_cse_not_expected, width; unsigned sym_p, var_p, off_p, rat_p, add_c; rtx seq, addr, base; rtx reg0, reg1; @@ -3152,33 +3267,40 @@ get_address_cost (bool symbol_present, bool var_present, reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); + width = GET_MODE_BITSIZE (address_mode) - 1; + if (width > (HOST_BITS_PER_WIDE_INT - 1)) + width = HOST_BITS_PER_WIDE_INT - 1; addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); - for (i = start; i <= 1 << 20; i <<= 1) + + for (i = width; i >= 0; i--) { - XEXP (addr, 1) = gen_int_mode (i, address_mode); - if (!memory_address_addr_space_p (mem_mode, addr, as)) + off = -((HOST_WIDE_INT) 1 << i); + XEXP (addr, 1) = gen_int_mode (off, address_mode); + if (memory_address_addr_space_p (mem_mode, addr, as)) break; } - data->max_offset = i == start ? 0 : i >> 1; - off = data->max_offset; + data->min_offset = (i == -1? 0 : off); - for (i = start; i <= 1 << 20; i <<= 1) + for (i = width; i >= 0; i--) { - XEXP (addr, 1) = gen_int_mode (-i, address_mode); - if (!memory_address_addr_space_p (mem_mode, addr, as)) + off = ((HOST_WIDE_INT) 1 << i) - 1; + XEXP (addr, 1) = gen_int_mode (off, address_mode); + if (memory_address_addr_space_p (mem_mode, addr, as)) break; } - data->min_offset = i == start ? 0 : -(i >> 1); + if (i == -1) + off = 0; + data->max_offset = off; if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "get_address_cost:\n"); - fprintf (dump_file, " min offset %s %d\n", + fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", GET_MODE_NAME (mem_mode), - (int) data->min_offset); - fprintf (dump_file, " max offset %s %d\n", + data->min_offset); + fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", GET_MODE_NAME (mem_mode), - (int) data->max_offset); + data->max_offset); } rat = 1; @@ -3379,6 +3501,42 @@ get_address_cost (bool symbol_present, bool var_present, return new_cost (cost + acost, complexity); } + /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the + the EXPR operand holding the shift. COST0 and COST1 are the costs for + calculating the operands of EXPR. Returns true if successful, and returns + the cost in COST. */ + +static bool +get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0, + comp_cost cost1, tree mult, bool speed, comp_cost *cost) +{ + comp_cost res; + tree op1 = TREE_OPERAND (expr, 1); + tree cst = TREE_OPERAND (mult, 1); + tree multop = TREE_OPERAND (mult, 0); + int m = exact_log2 (int_cst_value (cst)); + int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); + int sa_cost; + + if (!(m >= 0 && m < maxm)) + return false; + + sa_cost = (TREE_CODE (expr) != MINUS_EXPR + ? shiftadd_cost[speed][mode][m] + : (mult == op1 + ? shiftsub1_cost[speed][mode][m] + : shiftsub0_cost[speed][mode][m])); + res = new_cost (sa_cost, 0); + res = add_costs (res, mult == op1 ? cost0 : cost1); + + STRIP_NOPS (multop); + if (!is_gimple_val (multop)) + res = add_costs (res, force_expr_to_var_cost (multop, speed)); + + *cost = res; + return true; +} + /* Estimates cost of forcing expression EXPR into a variable. */ static comp_cost @@ -3415,9 +3573,7 @@ force_expr_to_var_cost (tree expr, bool speed) symbol_cost[i] = computation_cost (addr, i) + 1; address_cost[i] - = computation_cost (build2 (POINTER_PLUS_EXPR, type, - addr, - build_int_cst (sizetype, 2000)), i) + 1; + = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); @@ -3504,6 +3660,21 @@ force_expr_to_var_cost (tree expr, bool speed) case MINUS_EXPR: case NEGATE_EXPR: cost = new_cost (add_cost (mode, speed), 0); + if (TREE_CODE (expr) != NEGATE_EXPR) + { + tree mult = NULL_TREE; + comp_cost sa_cost; + if (TREE_CODE (op1) == MULT_EXPR) + mult = op1; + else if (TREE_CODE (op0) == MULT_EXPR) + mult = op0; + + if (mult != NULL_TREE + && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) + && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed, + &sa_cost)) + return sa_cost; + } break; case MULT_EXPR: @@ -3689,6 +3860,153 @@ difference_cost (struct ivopts_data *data, return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); } +/* Returns true if AFF1 and AFF2 are identical. */ + +static bool +compare_aff_trees (aff_tree *aff1, aff_tree *aff2) +{ + unsigned i; + + if (aff1->n != aff2->n) + return false; + + for (i = 0; i < aff1->n; i++) + { + if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0) + return false; + + if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) + return false; + } + return true; +} + +/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */ + +static int +get_expr_id (struct ivopts_data *data, tree expr) +{ + struct iv_inv_expr_ent ent; + struct iv_inv_expr_ent **slot; + + ent.expr = expr; + ent.hash = iterative_hash_expr (expr, 0); + slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab, + &ent, INSERT); + if (*slot) + return (*slot)->id; + + *slot = XNEW (struct iv_inv_expr_ent); + (*slot)->expr = expr; + (*slot)->hash = ent.hash; + (*slot)->id = data->inv_expr_id++; + return (*slot)->id; +} + +/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE + requires a new compiler generated temporary. Returns -1 otherwise. + ADDRESS_P is a flag indicating if the expression is for address + computation. */ + +static int +get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase, + tree cbase, HOST_WIDE_INT ratio, + bool address_p) +{ + aff_tree ubase_aff, cbase_aff; + tree expr, ub, cb; + + STRIP_NOPS (ubase); + STRIP_NOPS (cbase); + ub = ubase; + cb = cbase; + + if ((TREE_CODE (ubase) == INTEGER_CST) + && (TREE_CODE (cbase) == INTEGER_CST)) + return -1; + + /* Strips the constant part. */ + if (TREE_CODE (ubase) == PLUS_EXPR + || TREE_CODE (ubase) == MINUS_EXPR + || TREE_CODE (ubase) == POINTER_PLUS_EXPR) + { + if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) + ubase = TREE_OPERAND (ubase, 0); + } + + /* Strips the constant part. */ + if (TREE_CODE (cbase) == PLUS_EXPR + || TREE_CODE (cbase) == MINUS_EXPR + || TREE_CODE (cbase) == POINTER_PLUS_EXPR) + { + if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) + cbase = TREE_OPERAND (cbase, 0); + } + + if (address_p) + { + if (((TREE_CODE (ubase) == SSA_NAME) + || (TREE_CODE (ubase) == ADDR_EXPR + && is_gimple_min_invariant (ubase))) + && (TREE_CODE (cbase) == INTEGER_CST)) + return -1; + + if (((TREE_CODE (cbase) == SSA_NAME) + || (TREE_CODE (cbase) == ADDR_EXPR + && is_gimple_min_invariant (cbase))) + && (TREE_CODE (ubase) == INTEGER_CST)) + return -1; + } + + if (ratio == 1) + { + if(operand_equal_p (ubase, cbase, 0)) + return -1; + + if (TREE_CODE (ubase) == ADDR_EXPR + && TREE_CODE (cbase) == ADDR_EXPR) + { + tree usym, csym; + + usym = TREE_OPERAND (ubase, 0); + csym = TREE_OPERAND (cbase, 0); + if (TREE_CODE (usym) == ARRAY_REF) + { + tree ind = TREE_OPERAND (usym, 1); + if (TREE_CODE (ind) == INTEGER_CST + && host_integerp (ind, 0) + && TREE_INT_CST_LOW (ind) == 0) + usym = TREE_OPERAND (usym, 0); + } + if (TREE_CODE (csym) == ARRAY_REF) + { + tree ind = TREE_OPERAND (csym, 1); + if (TREE_CODE (ind) == INTEGER_CST + && host_integerp (ind, 0) + && TREE_INT_CST_LOW (ind) == 0) + csym = TREE_OPERAND (csym, 0); + } + if (operand_equal_p (usym, csym, 0)) + return -1; + } + /* Now do more complex comparison */ + tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); + tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); + if (compare_aff_trees (&ubase_aff, &cbase_aff)) + return -1; + } + + tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); + tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); + + aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio)); + aff_combination_add (&ubase_aff, &cbase_aff); + expr = aff_combination_to_tree (&ubase_aff); + return get_expr_id (data, expr); +} + + + /* Determines the cost of the computation by that USE is expressed from induction variable CAND. If ADDRESS_P is true, we just need to create an address from it, otherwise we want to get it into @@ -3701,7 +4019,8 @@ static comp_cost get_computation_cost_at (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, bool address_p, bitmap *depends_on, gimple at, - bool *can_autoinc) + bool *can_autoinc, + int *inv_expr_id) { tree ubase = use->iv->base, ustep = use->iv->step; tree cbase, cstep; @@ -3729,7 +4048,11 @@ get_computation_cost_at (struct ivopts_data *data, return infinite_cost; } - if (address_p) + if (address_p + || (use->iv->base_object + && cand->iv->base_object + && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) + && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) { /* Do not try to express address of an object with computation based on address of a different object. This may cause problems in rtl @@ -3770,6 +4093,8 @@ get_computation_cost_at (struct ivopts_data *data, STRIP_NOPS (cbase); ctype = TREE_TYPE (cbase); + stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); + /* use = ubase + ratio * (var - cbase). If either cbase is a constant or ratio == 1, it is better to handle this like @@ -3784,13 +4109,31 @@ get_computation_cost_at (struct ivopts_data *data, ubase, build_int_cst (utype, 0), &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else if (ratio == 1) { + tree real_cbase = cbase; + + /* Check to see if any adjustment is needed. */ + if (cstepi == 0 && stmt_is_after_inc) + { + aff_tree real_cbase_aff; + aff_tree cstep_aff; + + tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), + &real_cbase_aff); + tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); + + aff_combination_add (&real_cbase_aff, &cstep_aff); + real_cbase = aff_combination_to_tree (&real_cbase_aff); + } + cost = difference_cost (data, - ubase, cbase, + ubase, real_cbase, &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else if (address_p && !POINTER_TYPE_P (ctype) @@ -3804,21 +4147,31 @@ get_computation_cost_at (struct ivopts_data *data, ubase, cbase, &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else { cost = force_var_cost (data, cbase, depends_on); - cost.cost += add_cost (TYPE_MODE (ctype), data->speed); cost = add_costs (cost, difference_cost (data, ubase, build_int_cst (utype, 0), &symbol_present, &var_present, &offset, depends_on)); + cost.cost /= avg_loop_niter (data->current_loop); + cost.cost += add_cost (TYPE_MODE (ctype), data->speed); + } + + if (inv_expr_id) + { + *inv_expr_id = + get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p); + /* Clear depends on. */ + if (*inv_expr_id != -1 && depends_on && *depends_on) + bitmap_clear (*depends_on); } /* If we are after the increment, the value of the candidate is higher by one iteration. */ - stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); if (stmt_is_after_inc) offset -= ratio * cstepi; @@ -3845,8 +4198,8 @@ get_computation_cost_at (struct ivopts_data *data, /* Symbol + offset should be compile-time computable so consider that they are added once to the variable, if present. */ if (var_present && (symbol_present || offset)) - cost.cost += add_cost (TYPE_MODE (ctype), speed) - / AVG_LOOP_NITER (data->current_loop); + cost.cost += adjust_setup_cost (data, + add_cost (TYPE_MODE (ctype), speed)); /* Having offset does not affect runtime cost in case it is added to symbol, but it increases complexity. */ @@ -3858,6 +4211,7 @@ get_computation_cost_at (struct ivopts_data *data, aratio = ratio > 0 ? ratio : -ratio; if (aratio != 1) cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed); + return cost; fallback: if (can_autoinc) @@ -3871,7 +4225,7 @@ fallback: return infinite_cost; if (address_p) - comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp); + comp = build_simple_mem_ref (comp); return new_cost (computation_cost (comp, speed), 0); } @@ -3887,11 +4241,12 @@ fallback: static comp_cost get_computation_cost (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, - bool address_p, bitmap *depends_on, bool *can_autoinc) + bool address_p, bitmap *depends_on, + bool *can_autoinc, int *inv_expr_id) { return get_computation_cost_at (data, use, cand, address_p, depends_on, use->stmt, - can_autoinc); + can_autoinc, inv_expr_id); } /* Determines cost of basing replacement of USE on CAND in a generic @@ -3903,6 +4258,7 @@ determine_use_iv_cost_generic (struct ivopts_data *data, { bitmap depends_on; comp_cost cost; + int inv_expr_id = -1; /* The simple case first -- if we need to express value of the preserved original biv, the cost is 0. This also prevents us from counting the @@ -3911,12 +4267,16 @@ determine_use_iv_cost_generic (struct ivopts_data *data, if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) { - set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE); + set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, + ERROR_MARK, -1); return true; } - cost = get_computation_cost (data, use, cand, false, &depends_on, NULL); - set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); + cost = get_computation_cost (data, use, cand, false, &depends_on, + NULL, &inv_expr_id); + + set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, + inv_expr_id); return !infinite_cost_p (cost); } @@ -3929,8 +4289,9 @@ determine_use_iv_cost_address (struct ivopts_data *data, { bitmap depends_on; bool can_autoinc; + int inv_expr_id = -1; comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, - &can_autoinc); + &can_autoinc, &inv_expr_id); if (cand->ainc_use == use) { @@ -3942,7 +4303,8 @@ determine_use_iv_cost_address (struct ivopts_data *data, else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) cost = infinite_cost; } - set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); + set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, + inv_expr_id); return !infinite_cost_p (cost); } @@ -3982,15 +4344,20 @@ iv_period (struct iv *iv) gcc_assert (step && TREE_CODE (step) == INTEGER_CST); - /* Period of the iv is gcd (step, type range). Since type range is power - of two, it suffices to determine the maximum power of two that divides - step. */ - pow2div = num_ending_zeros (step); type = unsigned_type_for (TREE_TYPE (step)); + /* Period of the iv is lcm (step, type_range)/step -1, + i.e., N*type_range/step - 1. Since type range is power + of two, N == (step >> num_of_ending_zeros_binary (step), + so the final result is + + (type_range >> num_of_ending_zeros_binary (step)) - 1 + + */ + pow2div = num_ending_zeros (step); period = build_low_bits_mask (type, - (TYPE_PRECISION (type) - - tree_low_cst (pow2div, 1))); + (TYPE_PRECISION (type) + - tree_low_cst (pow2div, 1))); return period; } @@ -4012,18 +4379,264 @@ iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); } +static tree +strip_wrap_conserving_type_conversions (tree exp) +{ + while (tree_ssa_useless_type_conversion (exp) + && (nowrap_type_p (TREE_TYPE (exp)) + == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0))))) + exp = TREE_OPERAND (exp, 0); + return exp; +} + +/* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we + check for an exact match. */ + +static bool +expr_equal_p (tree e, tree what) +{ + gimple stmt; + enum tree_code code; + + e = strip_wrap_conserving_type_conversions (e); + what = strip_wrap_conserving_type_conversions (what); + + code = TREE_CODE (what); + if (TREE_TYPE (e) != TREE_TYPE (what)) + return false; + + if (operand_equal_p (e, what, 0)) + return true; + + if (TREE_CODE (e) != SSA_NAME) + return false; + + stmt = SSA_NAME_DEF_STMT (e); + if (gimple_code (stmt) != GIMPLE_ASSIGN + || gimple_assign_rhs_code (stmt) != code) + return false; + + switch (get_gimple_rhs_class (code)) + { + case GIMPLE_BINARY_RHS: + if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1))) + return false; + /* Fallthru. */ + + case GIMPLE_UNARY_RHS: + case GIMPLE_SINGLE_RHS: + return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0)); + default: + return false; + } +} + +/* Returns true if we can prove that BASE - OFFSET does not overflow. For now, + we only detect the situation that BASE = SOMETHING + OFFSET, where the + calculation is performed in non-wrapping type. + + TODO: More generally, we could test for the situation that + BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. + This would require knowing the sign of OFFSET. + + Also, we only look for the first addition in the computation of BASE. + More complex analysis would be better, but introducing it just for + this optimization seems like an overkill. */ + +static bool +difference_cannot_overflow_p (tree base, tree offset) +{ + enum tree_code code; + tree e1, e2; + + if (!nowrap_type_p (TREE_TYPE (base))) + return false; + + base = expand_simple_operations (base); + + if (TREE_CODE (base) == SSA_NAME) + { + gimple stmt = SSA_NAME_DEF_STMT (base); + + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return false; + + code = gimple_assign_rhs_code (stmt); + if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) + return false; + + e1 = gimple_assign_rhs1 (stmt); + e2 = gimple_assign_rhs2 (stmt); + } + else + { + code = TREE_CODE (base); + if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) + return false; + e1 = TREE_OPERAND (base, 0); + e2 = TREE_OPERAND (base, 1); + } + + /* TODO: deeper inspection may be necessary to prove the equality. */ + switch (code) + { + case PLUS_EXPR: + return expr_equal_p (e1, offset) || expr_equal_p (e2, offset); + case POINTER_PLUS_EXPR: + return expr_equal_p (e2, offset); + + default: + return false; + } +} + +/* Tries to replace loop exit by one formulated in terms of a LT_EXPR + comparison with CAND. NITER describes the number of iterations of + the loops. If successful, the comparison in COMP_P is altered accordingly. + + We aim to handle the following situation: + + sometype *base, *p; + int a, b, i; + + i = a; + p = p_0 = base + a; + + do + { + bla (*p); + p++; + i++; + } + while (i < b); + + Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. + We aim to optimize this to + + p = p_0 = base + a; + do + { + bla (*p); + p++; + } + while (p < p_0 - a + b); + + This preserves the correctness, since the pointer arithmetics does not + overflow. More precisely: + + 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no + overflow in computing it or the values of p. + 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not + overflow. To prove this, we use the fact that p_0 = base + a. */ + +static bool +iv_elimination_compare_lt (struct ivopts_data *data, + struct iv_cand *cand, enum tree_code *comp_p, + struct tree_niter_desc *niter) +{ + tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; + struct affine_tree_combination nit, tmpa, tmpb; + enum tree_code comp; + HOST_WIDE_INT step; + + /* We need to know that the candidate induction variable does not overflow. + While more complex analysis may be used to prove this, for now just + check that the variable appears in the original program and that it + is computed in a type that guarantees no overflows. */ + cand_type = TREE_TYPE (cand->iv->base); + if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) + return false; + + /* Make sure that the loop iterates till the loop bound is hit, as otherwise + the calculation of the BOUND could overflow, making the comparison + invalid. */ + if (!data->loop_single_exit_p) + return false; + + /* We need to be able to decide whether candidate is increasing or decreasing + in order to choose the right comparison operator. */ + if (!cst_and_fits_in_hwi (cand->iv->step)) + return false; + step = int_cst_value (cand->iv->step); + + /* Check that the number of iterations matches the expected pattern: + a + 1 > b ? 0 : b - a - 1. */ + mbz = niter->may_be_zero; + if (TREE_CODE (mbz) == GT_EXPR) + { + /* Handle a + 1 > b. */ + tree op0 = TREE_OPERAND (mbz, 0); + if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) + { + a = TREE_OPERAND (op0, 0); + b = TREE_OPERAND (mbz, 1); + } + else + return false; + } + else if (TREE_CODE (mbz) == LT_EXPR) + { + tree op1 = TREE_OPERAND (mbz, 1); + + /* Handle b < a + 1. */ + if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) + { + a = TREE_OPERAND (op1, 0); + b = TREE_OPERAND (mbz, 0); + } + else + return false; + } + else + return false; + + /* Expected number of iterations is B - A - 1. Check that it matches + the actual number, i.e., that B - A - NITER = 1. */ + tree_to_aff_combination (niter->niter, nit_type, &nit); + tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); + tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); + aff_combination_scale (&nit, double_int_minus_one); + aff_combination_scale (&tmpa, double_int_minus_one); + aff_combination_add (&tmpb, &tmpa); + aff_combination_add (&tmpb, &nit); + if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one)) + return false; + + /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not + overflow. */ + offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), + cand->iv->step, + fold_convert (TREE_TYPE (cand->iv->step), a)); + if (!difference_cannot_overflow_p (cand->iv->base, offset)) + return false; + + /* Determine the new comparison operator. */ + comp = step < 0 ? GT_EXPR : LT_EXPR; + if (*comp_p == NE_EXPR) + *comp_p = comp; + else if (*comp_p == EQ_EXPR) + *comp_p = invert_tree_comparison (comp, false); + else + gcc_unreachable (); + + return true; +} + /* Check whether it is possible to express the condition in USE by comparison - of candidate CAND. If so, store the value compared with to BOUND. */ + of candidate CAND. If so, store the value compared with to BOUND, and the + comparison operator to COMP. */ static bool may_eliminate_iv (struct ivopts_data *data, - struct iv_use *use, struct iv_cand *cand, tree *bound) + struct iv_use *use, struct iv_cand *cand, tree *bound, + enum tree_code *comp) { basic_block ex_bb; edge exit; - tree nit, period; + tree period; struct loop *loop = data->current_loop; aff_tree bnd; + struct tree_niter_desc *desc = NULL; if (TREE_CODE (cand->iv->step) != INTEGER_CST) return false; @@ -4042,8 +4655,8 @@ may_eliminate_iv (struct ivopts_data *data, if (flow_bb_inside_loop_p (loop, exit->dest)) return false; - nit = niter_for_exit (data, exit); - if (!nit) + desc = niter_for_exit (data, exit); + if (!desc) return false; /* Determine whether we can use the variable to test the exit condition. @@ -4052,39 +4665,89 @@ may_eliminate_iv (struct ivopts_data *data, period = iv_period (cand->iv); /* If the number of iterations is constant, compare against it directly. */ - if (TREE_CODE (nit) == INTEGER_CST) - { - if (!tree_int_cst_lt (nit, period)) - return false; + if (TREE_CODE (desc->niter) == INTEGER_CST) + { + /* See cand_value_at. */ + if (stmt_after_increment (loop, cand, use->stmt)) + { + if (!tree_int_cst_lt (desc->niter, period)) + return false; + } + else + { + if (tree_int_cst_lt (period, desc->niter)) + return false; + } } /* If not, and if this is the only possible exit of the loop, see whether we can get a conservative estimate on the number of iterations of the entire loop and compare against that instead. */ - else if (loop_only_exit_p (loop, exit)) + else { double_int period_value, max_niter; - if (!estimated_loop_iterations (loop, true, &max_niter)) - return false; - period_value = tree_to_double_int (period); - if (double_int_ucmp (max_niter, period_value) >= 0) - return false; - } - - /* Otherwise, punt. */ - else - return false; - cand_value_at (loop, cand, use->stmt, nit, &bnd); + max_niter = desc->max; + if (stmt_after_increment (loop, cand, use->stmt)) + max_niter = double_int_add (max_niter, double_int_one); + period_value = tree_to_double_int (period); + if (double_int_ucmp (max_niter, period_value) > 0) + { + /* See if we can take advantage of infered loop bound information. */ + if (data->loop_single_exit_p) + { + if (!estimated_loop_iterations (loop, true, &max_niter)) + return false; + /* The loop bound is already adjusted by adding 1. */ + if (double_int_ucmp (max_niter, period_value) > 0) + return false; + } + else + return false; + } + } + + cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); *bound = aff_combination_to_tree (&bnd); + *comp = iv_elimination_compare (data, use); + /* It is unlikely that computing the number of iterations using division would be more profitable than keeping the original induction variable. */ if (expression_expensive_p (*bound)) return false; + + /* Sometimes, it is possible to handle the situation that the number of + iterations may be zero unless additional assumtions by using < + instead of != in the exit condition. + + TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and + base the exit condition on it. However, that is often too + expensive. */ + if (!integer_zerop (desc->may_be_zero)) + return iv_elimination_compare_lt (data, cand, comp, desc); + return true; } + /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must + be copied, if is is used in the loop body and DATA->body_includes_call. */ + +static int +parm_decl_cost (struct ivopts_data *data, tree bound) +{ + tree sbound = bound; + STRIP_NOPS (sbound); + + if (TREE_CODE (sbound) == SSA_NAME + && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL + && gimple_nop_p (SSA_NAME_DEF_STMT (sbound)) + && data->body_includes_call) + return COSTS_N_INSNS (1); + + return 0; +} + /* Determines cost of basing replacement of USE on CAND in a condition. */ static bool @@ -4094,24 +4757,42 @@ determine_use_iv_cost_condition (struct ivopts_data *data, tree bound = NULL_TREE; struct iv *cmp_iv; bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; - comp_cost elim_cost, express_cost, cost; + comp_cost elim_cost, express_cost, cost, bound_cost; bool ok; + int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id; tree *control_var, *bound_cst; + enum tree_code comp = ERROR_MARK; /* Only consider real candidates. */ if (!cand->iv) { - set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE); + set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, + ERROR_MARK, -1); return false; } /* Try iv elimination. */ - if (may_eliminate_iv (data, use, cand, &bound)) + if (may_eliminate_iv (data, use, cand, &bound, &comp)) { elim_cost = force_var_cost (data, bound, &depends_on_elim); + if (elim_cost.cost == 0) + elim_cost.cost = parm_decl_cost (data, bound); + else if (TREE_CODE (bound) == INTEGER_CST) + elim_cost.cost = 0; + /* If we replace a loop condition 'i < n' with 'p < base + n', + depends_on_elim will have 'base' and 'n' set, which implies + that both 'base' and 'n' will be live during the loop. More likely, + 'base + n' will be loop invariant, resulting in only one live value + during the loop. So in that case we clear depends_on_elim and set + elim_inv_expr_id instead. */ + if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1) + { + elim_inv_expr_id = get_expr_id (data, bound); + bitmap_clear (depends_on_elim); + } /* The bound is a loop invariant, so it will be only computed once. */ - elim_cost.cost /= AVG_LOOP_NITER (data->current_loop); + elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); } else elim_cost = infinite_cost; @@ -4135,16 +4816,26 @@ determine_use_iv_cost_condition (struct ivopts_data *data, elim_cost.cost -= 1; express_cost = get_computation_cost (data, use, cand, false, - &depends_on_express, NULL); + &depends_on_express, NULL, + &express_inv_expr_id); fd_ivopts_data = data; walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); + /* Count the cost of the original bound as well. */ + bound_cost = force_var_cost (data, *bound_cst, NULL); + if (bound_cost.cost == 0) + bound_cost.cost = parm_decl_cost (data, *bound_cst); + else if (TREE_CODE (*bound_cst) == INTEGER_CST) + bound_cost.cost = 0; + express_cost.cost += bound_cost.cost; + /* Choose the better approach, preferring the eliminated IV. */ if (compare_costs (elim_cost, express_cost) <= 0) { cost = elim_cost; depends_on = depends_on_elim; depends_on_elim = NULL; + inv_expr_id = elim_inv_expr_id; } else { @@ -4152,9 +4843,11 @@ determine_use_iv_cost_condition (struct ivopts_data *data, depends_on = depends_on_express; depends_on_express = NULL; bound = NULL_TREE; + comp = ERROR_MARK; + inv_expr_id = express_inv_expr_id; } - set_use_iv_cost (data, use, cand, cost, depends_on, bound); + set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id); if (depends_on_elim) BITMAP_FREE (depends_on_elim); @@ -4202,7 +4895,7 @@ autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, return false; cost = get_computation_cost (data, use, cand, true, &depends_on, - &can_autoinc); + &can_autoinc, NULL); BITMAP_FREE (depends_on); @@ -4326,6 +5019,8 @@ determine_use_iv_costs (struct ivopts_data *data) if (use->cost_map[j].depends_on) bitmap_print (dump_file, use->cost_map[j].depends_on, "",""); + if (use->cost_map[j].inv_expr_id != -1) + fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id); fprintf (dump_file, "\n"); } @@ -4356,9 +5051,14 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) base = cand->iv->base; cost_base = force_var_cost (data, base, NULL); + /* It will be exceptional that the iv register happens to be initialized with + the proper value at no cost. In general, there will at least be a regcopy + or a const set. */ + if (cost_base.cost == 0) + cost_base.cost = COSTS_N_INSNS (1); cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); - cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop); + cost = cost_step + adjust_setup_cost (data, cost_base.cost); /* Prefer the original ivs unless we may gain something by replacing it. The reason is to make debugging simpler; so this is not relevant for @@ -4411,7 +5111,8 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) { /* We add size to the cost, so that we prefer eliminating ivs if possible. */ - return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); + return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, + data->body_includes_call); } /* For each size of the induction variable set determine the penalty. */ @@ -4426,30 +5127,11 @@ determine_set_costs (struct ivopts_data *data) struct loop *loop = data->current_loop; bitmap_iterator bi; - /* We use the following model (definitely improvable, especially the - cost function -- TODO): - - We estimate the number of registers available (using MD data), name it A. - - We estimate the number of registers used by the loop, name it U. This - number is obtained as the number of loop phi nodes (not counting virtual - registers and bivs) + the number of variables from outside of the loop. - - We set a reserve R (free regs that are used for temporary computations, - etc.). For now the reserve is a constant 3. - - Let I be the number of induction variables. - - -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage - make a lot of ivs without a reason). - -- if A - R < U + I <= A, the cost is I * PRES_COST - -- if U + I > A, the cost is I * PRES_COST and - number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ - if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Global costs:\n"); fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); + fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); } @@ -4519,14 +5201,26 @@ cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) return false; } + +/* Returns candidate by that USE is expressed in IVS. */ + +static struct cost_pair * +iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) +{ + return ivs->cand_for_use[use->id]; +} + /* Computes the cost field of IVS structure. */ static void iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) { comp_cost cost = ivs->cand_use_cost; + cost.cost += ivs->cand_cost; - cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs); + + cost.cost += ivopts_global_cost_for_size (data, + ivs->n_regs + ivs->num_used_inv_expr); ivs->cost = cost; } @@ -4546,7 +5240,7 @@ iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) { ivs->n_invariant_uses[iid]--; if (ivs->n_invariant_uses[iid] == 0) - ivs->n_regs--; + ivs->n_regs--; } } @@ -4583,6 +5277,13 @@ iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost); iv_ca_set_remove_invariants (ivs, cp->depends_on); + + if (cp->inv_expr_id != -1) + { + ivs->used_inv_expr[cp->inv_expr_id]--; + if (ivs->used_inv_expr[cp->inv_expr_id] == 0) + ivs->num_used_inv_expr--; + } iv_ca_recount_cost (data, ivs); } @@ -4601,7 +5302,7 @@ iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) { ivs->n_invariant_uses[iid]++; if (ivs->n_invariant_uses[iid] == 1) - ivs->n_regs++; + ivs->n_regs++; } } @@ -4640,19 +5341,28 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost); iv_ca_set_add_invariants (ivs, cp->depends_on); + + if (cp->inv_expr_id != -1) + { + ivs->used_inv_expr[cp->inv_expr_id]++; + if (ivs->used_inv_expr[cp->inv_expr_id] == 1) + ivs->num_used_inv_expr++; + } iv_ca_recount_cost (data, ivs); } } /* Extend set IVS by expressing USE by some of the candidates in it - if possible. */ + if possible. All important candidates will be considered + if IMPORTANT_CANDIDATES is true. */ static void iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, - struct iv_use *use) + struct iv_use *use, bool important_candidates) { struct cost_pair *best_cp = NULL, *cp; bitmap_iterator bi; + bitmap cands; unsigned i; gcc_assert (ivs->upto >= use->id); @@ -4663,9 +5373,12 @@ iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, ivs->bad_uses++; } - EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) + cands = (important_candidates ? data->important_candidates : ivs->cands); + EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi) { - cp = get_use_iv_cost (data, use, iv_cand (data, i)); + struct iv_cand *cand = iv_cand (data, i); + + cp = get_use_iv_cost (data, use, cand); if (cheaper_cost_pair (cp, best_cp)) best_cp = cp; @@ -4745,14 +5458,6 @@ iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) return l1; } -/* Returns candidate by that USE is expressed in IVS. */ - -static struct cost_pair * -iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) -{ - return ivs->cand_for_use[use->id]; -} - /* Reverse the list of changes DELTA, forming the inverse to it. */ static struct iv_ca_delta * @@ -4850,6 +5555,8 @@ iv_ca_new (struct ivopts_data *data) nw->cand_cost = 0; nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); nw->cost = zero_cost; + nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); + nw->num_used_inv_expr = 0; return nw; } @@ -4863,6 +5570,7 @@ iv_ca_free (struct iv_ca **ivs) free ((*ivs)->n_cand_uses); BITMAP_FREE ((*ivs)->cands); free ((*ivs)->n_invariant_uses); + free ((*ivs)->used_inv_expr); free (*ivs); *ivs = NULL; } @@ -4876,8 +5584,21 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) unsigned i; comp_cost cost = iv_ca_cost (ivs); - fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity); - bitmap_print (file, ivs->cands, " candidates ","\n"); + fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity); + fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n", + ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); + bitmap_print (file, ivs->cands, " candidates: ","\n"); + + for (i = 0; i < ivs->upto; i++) + { + struct iv_use *use = iv_use (data, i); + struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); + if (cp) + fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n", + use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); + else + fprintf (file, " use:%d --> ??\n", use->id); + } for (i = 1; i <= data->max_inv_id; i++) if (ivs->n_invariant_uses[i]) @@ -4885,17 +5606,18 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) fprintf (file, "%s%d", pref, i); pref = ", "; } - fprintf (file, "\n"); + fprintf (file, "\n\n"); } /* Try changing candidate in IVS to CAND for each use. Return cost of the new set, and store differences in DELTA. Number of induction variables - in the new set is stored to N_IVS. */ + in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true + the function will try to find a solution with mimimal iv candidates. */ static comp_cost iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, struct iv_cand *cand, struct iv_ca_delta **delta, - unsigned *n_ivs) + unsigned *n_ivs, bool min_ncand) { unsigned i; comp_cost cost; @@ -4916,11 +5638,11 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, if (!new_cp) continue; - if (!iv_ca_has_deps (ivs, new_cp)) + if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) continue; - if (!cheaper_cost_pair (new_cp, old_cp)) - continue; + if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) + continue; *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); } @@ -4971,8 +5693,9 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, cp = get_use_iv_cost (data, use, cnd); if (!cp) continue; + if (!iv_ca_has_deps (ivs, cp)) - continue; + continue; if (!cheaper_cost_pair (cp, new_cp)) continue; @@ -5069,11 +5792,13 @@ iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, } /* Tries to extend the sets IVS in the best possible way in order - to express the USE. */ + to express the USE. If ORIGINALP is true, prefer candidates from + the original set of IVs, otherwise favor important candidates not + based on any memory object. */ static bool try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, - struct iv_use *use) + struct iv_use *use, bool originalp) { comp_cost best_cost, act_cost; unsigned i; @@ -5082,17 +5807,26 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, struct iv_ca_delta *best_delta = NULL, *act_delta; struct cost_pair *cp; - iv_ca_add_use (data, ivs, use); + iv_ca_add_use (data, ivs, use, false); best_cost = iv_ca_cost (ivs); cp = iv_ca_cand_for_use (ivs, use); + if (!cp) + { + ivs->upto--; + ivs->bad_uses--; + iv_ca_add_use (data, ivs, use, true); + best_cost = iv_ca_cost (ivs); + cp = iv_ca_cand_for_use (ivs, use); + } if (cp) { best_delta = iv_ca_delta_add (use, NULL, cp, NULL); iv_ca_set_no_cp (data, ivs, use); } - /* First try important candidates not based on any memory object. Only if + /* If ORIGINALP is true, try to find the original IV for the use. Otherwise + first try important candidates not based on any memory object. Only if this fails, try the specific ones. Rationale -- in loops with many variables the best choice often is to use just one generic biv. If we added here many ivs specific to the uses, the optimization algorithm later @@ -5104,18 +5838,22 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, { cand = iv_cand (data, i); - if (cand->iv->base_object != NULL_TREE) + if (originalp && cand->pos !=IP_ORIGINAL) continue; - if (iv_ca_cand_used_p (ivs, cand)) + if (!originalp && cand->iv->base_object != NULL_TREE) continue; + if (iv_ca_cand_used_p (ivs, cand)) + continue; + cp = get_use_iv_cost (data, use, cand); if (!cp) continue; iv_ca_set_cp (data, ivs, use, cp); - act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); + act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, + true); iv_ca_set_no_cp (data, ivs, use); act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); @@ -5140,15 +5878,20 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, continue; /* Already tried this. */ - if (cand->important && cand->iv->base_object == NULL_TREE) - continue; + if (cand->important) + { + if (originalp && cand->pos == IP_ORIGINAL) + continue; + if (!originalp && cand->iv->base_object == NULL_TREE) + continue; + } if (iv_ca_cand_used_p (ivs, cand)) continue; act_delta = NULL; iv_ca_set_cp (data, ivs, use, cp); - act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); + act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); iv_ca_set_no_cp (data, ivs, use); act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), cp, act_delta); @@ -5175,13 +5918,13 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, /* Finds an initial assignment of candidates to uses. */ static struct iv_ca * -get_initial_solution (struct ivopts_data *data) +get_initial_solution (struct ivopts_data *data, bool originalp) { struct iv_ca *ivs = iv_ca_new (data); unsigned i; for (i = 0; i < n_iv_uses (data); i++) - if (!try_add_cand_for (data, ivs, iv_use (data, i))) + if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) { iv_ca_free (&ivs); return NULL; @@ -5208,7 +5951,7 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) if (iv_ca_cand_used_p (ivs, cand)) continue; - acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs); + acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); if (!act_delta) continue; @@ -5253,14 +5996,12 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) solution and remove the unused ivs while this improves the cost. */ static struct iv_ca * -find_optimal_iv_set (struct ivopts_data *data) +find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) { - unsigned i; struct iv_ca *set; - struct iv_use *use; /* Get the initial solution. */ - set = get_initial_solution (data); + set = get_initial_solution (data, originalp); if (!set) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -5283,11 +6024,46 @@ find_optimal_iv_set (struct ivopts_data *data) } } + return set; +} + +static struct iv_ca * +find_optimal_iv_set (struct ivopts_data *data) +{ + unsigned i; + struct iv_ca *set, *origset; + struct iv_use *use; + comp_cost cost, origcost; + + /* Determine the cost based on a strategy that starts with original IVs, + and try again using a strategy that prefers candidates not based + on any IVs. */ + origset = find_optimal_iv_set_1 (data, true); + set = find_optimal_iv_set_1 (data, false); + + if (!origset && !set) + return NULL; + + origcost = origset ? iv_ca_cost (origset) : infinite_cost; + cost = set ? iv_ca_cost (set) : infinite_cost; + if (dump_file && (dump_flags & TDF_DETAILS)) { - comp_cost cost = iv_ca_cost (set); - fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity); + fprintf (dump_file, "Original cost %d (complexity %d)\n\n", + origcost.cost, origcost.complexity); + fprintf (dump_file, "Final cost %d (complexity %d)\n\n", + cost.cost, cost.complexity); + } + + /* Choose the one with the best cost. */ + if (compare_costs (origcost, cost) <= 0) + { + if (set) + iv_ca_free (&set); + set = origset; } + else if (origset) + iv_ca_free (&origset); for (i = 0; i < n_iv_uses (data); i++) { @@ -5335,7 +6111,6 @@ create_new_iv (struct ivopts_data *data, struct iv_cand *cand) /* Rewrite the increment so that it uses var_before directly. */ find_interesting_uses_op (data, cand->var_after)->selected = cand; - return; } @@ -5363,8 +6138,18 @@ create_new_ivs (struct ivopts_data *data, struct iv_ca *set) cand = iv_cand (data, i); create_new_iv (data, cand); } -} + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nSelected IV set: \n"); + EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) + { + cand = iv_cand (data, i); + dump_cand (dump_file, cand); + } + fprintf (dump_file, "\n"); + } +} /* Rewrites USE (definition of iv used in a nonlinear expression) using candidate CAND. */ @@ -5385,35 +6170,24 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) { - tree step, ctype, utype; - enum tree_code incr_code = PLUS_EXPR, old_code; + enum tree_code stmt_code; gcc_assert (is_gimple_assign (use->stmt)); gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); - step = cand->iv->step; - ctype = TREE_TYPE (step); - utype = TREE_TYPE (cand->var_after); - if (TREE_CODE (step) == NEGATE_EXPR) - { - incr_code = MINUS_EXPR; - step = TREE_OPERAND (step, 0); - } - /* Check whether we may leave the computation unchanged. This is the case only if it does not rely on other computations in the loop -- otherwise, the computation we rely upon may be removed in remove_unused_ivs, thus leading to ICE. */ - old_code = gimple_assign_rhs_code (use->stmt); - if (old_code == PLUS_EXPR - || old_code == MINUS_EXPR - || old_code == POINTER_PLUS_EXPR) + stmt_code = gimple_assign_rhs_code (use->stmt); + if (stmt_code == PLUS_EXPR + || stmt_code == MINUS_EXPR + || stmt_code == POINTER_PLUS_EXPR) { if (gimple_assign_rhs1 (use->stmt) == cand->var_before) op = gimple_assign_rhs2 (use->stmt); - else if (old_code != MINUS_EXPR - && gimple_assign_rhs2 (use->stmt) == cand->var_before) + else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) op = gimple_assign_rhs1 (use->stmt); else op = NULL_TREE; @@ -5421,24 +6195,13 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, else op = NULL_TREE; - if (op - && (TREE_CODE (op) == INTEGER_CST - || operand_equal_p (op, step, 0))) + if (op && expr_invariant_in_loop_p (data->current_loop, op)) return; - - /* Otherwise, add the necessary computations to express - the iv. */ - op = fold_convert (ctype, cand->var_before); - comp = fold_convert (utype, - build2 (incr_code, ctype, op, - unshare_expr (step))); - } - else - { - comp = get_computation (data->current_loop, use, cand); - gcc_assert (comp != NULL_TREE); } + comp = get_computation (data->current_loop, use, cand); + gcc_assert (comp != NULL_TREE); + switch (gimple_code (use->stmt)) { case GIMPLE_PHI: @@ -5460,12 +6223,31 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, gcc_unreachable (); } - op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt), - true, GSI_SAME_STMT); + if (!valid_gimple_rhs_p (comp) + || (gimple_code (use->stmt) != GIMPLE_PHI + /* We can't allow re-allocating the stmt as it might be pointed + to still. */ + && (get_gimple_rhs_num_ops (TREE_CODE (comp)) + >= gimple_num_ops (gsi_stmt (bsi))))) + { + comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE, + true, GSI_SAME_STMT); + if (POINTER_TYPE_P (TREE_TYPE (tgt))) + { + duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); + /* As this isn't a plain copy we have to reset alignment + information. */ + if (SSA_NAME_PTR_INFO (comp)) + { + SSA_NAME_PTR_INFO (comp)->align = 1; + SSA_NAME_PTR_INFO (comp)->misalign = 0; + } + } + } if (gimple_code (use->stmt) == GIMPLE_PHI) { - ass = gimple_build_assign (tgt, op); + ass = gimple_build_assign (tgt, comp); gsi_insert_before (&bsi, ass, GSI_SAME_STMT); bsi = gsi_for_stmt (use->stmt); @@ -5473,62 +6255,92 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, } else { - gimple_assign_set_rhs_from_tree (&bsi, op); + gimple_assign_set_rhs_from_tree (&bsi, comp); use->stmt = gsi_stmt (bsi); } } -/* Replaces ssa name in index IDX by its basic variable. Callback for - for_each_index. */ +/* Performs a peephole optimization to reorder the iv update statement with + a mem ref to enable instruction combining in later phases. The mem ref uses + the iv value before the update, so the reordering transformation requires + adjustment of the offset. CAND is the selected IV_CAND. -static bool -idx_remove_ssa_names (tree base, tree *idx, - void *data ATTRIBUTE_UNUSED) -{ - tree *op; + Example: - if (TREE_CODE (*idx) == SSA_NAME) - *idx = SSA_NAME_VAR (*idx); + t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset + iv2 = iv1 + 1; - if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) - { - op = &TREE_OPERAND (base, 2); - if (*op - && TREE_CODE (*op) == SSA_NAME) - *op = SSA_NAME_VAR (*op); - op = &TREE_OPERAND (base, 3); - if (*op - && TREE_CODE (*op) == SSA_NAME) - *op = SSA_NAME_VAR (*op); - } + if (t < val) (1) + goto L; + goto Head; - return true; -} -/* Unshares REF and replaces ssa names inside it by their basic variables. */ + directly propagating t over to (1) will introduce overlapping live range + thus increase register pressure. This peephole transform it into: -static tree -unshare_and_remove_ssa_names (tree ref) -{ - ref = unshare_expr (ref); - for_each_index (&ref, idx_remove_ssa_names, NULL); - - return ref; -} -/* Copies the reference information from OLD_REF to NEW_REF. */ + iv2 = iv1 + 1; + t = MEM_REF (base, iv2, 8, 8); + if (t < val) + goto L; + goto Head; +*/ static void -copy_ref_info (tree new_ref, tree old_ref) +adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) { - if (TREE_CODE (old_ref) == TARGET_MEM_REF) - copy_mem_ref_info (new_ref, old_ref); - else + tree var_after; + gimple iv_update, stmt; + basic_block bb; + gimple_stmt_iterator gsi, gsi_iv; + + if (cand->pos != IP_NORMAL) + return; + + var_after = cand->var_after; + iv_update = SSA_NAME_DEF_STMT (var_after); + + bb = gimple_bb (iv_update); + gsi = gsi_last_nondebug_bb (bb); + stmt = gsi_stmt (gsi); + + /* Only handle conditional statement for now. */ + if (gimple_code (stmt) != GIMPLE_COND) + return; + + gsi_prev_nondebug (&gsi); + stmt = gsi_stmt (gsi); + if (stmt != iv_update) + return; + + gsi_prev_nondebug (&gsi); + if (gsi_end_p (gsi)) + return; + + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return; + + if (stmt != use->stmt) + return; + + if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) + return; + + if (dump_file && (dump_flags & TDF_DETAILS)) { - TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref); - TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref); - TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref); + fprintf (dump_file, "Reordering \n"); + print_gimple_stmt (dump_file, iv_update, 0, 0); + print_gimple_stmt (dump_file, use->stmt, 0, 0); + fprintf (dump_file, "\n"); } + + gsi = gsi_for_stmt (use->stmt); + gsi_iv = gsi_for_stmt (iv_update); + gsi_move_before (&gsi_iv, &gsi); + + cand->pos = IP_BEFORE_USE; + cand->incremented_at = use->stmt; } /* Rewrites USE (address that is an iv) using candidate CAND. */ @@ -5540,9 +6352,10 @@ rewrite_use_address (struct ivopts_data *data, aff_tree aff; gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); tree base_hint = NULL_TREE; - tree ref; + tree ref, iv; bool ok; + adjust_iv_update_pos (cand, use); ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); gcc_assert (ok); unshare_aff_combination (&aff); @@ -5561,8 +6374,10 @@ rewrite_use_address (struct ivopts_data *data, if (cand->iv->base_object) base_hint = var_at_stmt (data->current_loop, cand, use->stmt); - ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint, - data->speed); + iv = var_at_stmt (data->current_loop, cand, use->stmt); + ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, + reference_alias_ptr_type (*use->op_p), + iv, base_hint, data->speed); copy_ref_info (ref, *use->op_p); *use->op_p = ref; } @@ -5587,7 +6402,12 @@ rewrite_use_compare (struct ivopts_data *data, tree var_type = TREE_TYPE (var); gimple_seq stmts; - compare = iv_elimination_compare (data, use); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replacing exit test: "); + print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); + } + compare = cp->comp; bound = unshare_expr (fold_convert (var_type, bound)); op = force_gimple_operand (bound, &stmts, true, NULL_TREE); if (stmts) @@ -5688,6 +6508,19 @@ remove_unused_ivs (struct ivopts_data *data) BITMAP_FREE (toremove); } +/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback + for pointer_map_traverse. */ + +static bool +free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value, + void *data ATTRIBUTE_UNUSED) +{ + struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value; + + free (niter); + return true; +} + /* Frees data allocated by the optimization of a single loop. */ static void @@ -5699,6 +6532,7 @@ free_loop_data (struct ivopts_data *data) if (data->niters) { + pointer_map_traverse (data->niters, free_tree_niter_desc, NULL); pointer_map_destroy (data->niters); data->niters = NULL; } @@ -5708,8 +6542,7 @@ free_loop_data (struct ivopts_data *data) struct version_info *info; info = ver_info (data, i); - if (info->iv) - free (info->iv); + free (info->iv); info->iv = NULL; info->has_nonlin_use = false; info->preserve_biv = false; @@ -5736,8 +6569,7 @@ free_loop_data (struct ivopts_data *data) { struct iv_cand *cand = iv_cand (data, i); - if (cand->iv) - free (cand->iv); + free (cand->iv); if (cand->depends_on) BITMAP_FREE (cand->depends_on); free (cand); @@ -5753,10 +6585,13 @@ free_loop_data (struct ivopts_data *data) data->max_inv_id = 0; - for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++) + FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj) SET_DECL_RTL (obj, NULL_RTX); VEC_truncate (tree, decl_rtl_to_reset, 0); + + htab_empty (data->inv_expr_tab); + data->inv_expr_id = 0; } /* Finalizes data structures used by the iv optimization pass. LOOPS is the @@ -5773,6 +6608,26 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data) VEC_free (tree, heap, decl_rtl_to_reset); VEC_free (iv_use_p, heap, data->iv_uses); VEC_free (iv_cand_p, heap, data->iv_candidates); + htab_delete (data->inv_expr_tab); +} + +/* Returns true if the loop body BODY includes any function calls. */ + +static bool +loop_body_includes_call (basic_block *body, unsigned num_nodes) +{ + gimple_stmt_iterator gsi; + unsigned i; + + for (i = 0; i < num_nodes; i++) + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) + return true; + } + return false; } /* Optimizes the LOOP. Returns true if anything changed. */ @@ -5782,7 +6637,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) { bool changed = false; struct iv_ca *iv_ca; - edge exit; + edge exit = single_dom_exit (loop); basic_block *body; gcc_assert (!data->niters); @@ -5793,7 +6648,6 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) { fprintf (dump_file, "Processing loop %d\n", loop->num); - exit = single_dom_exit (loop); if (exit) { fprintf (dump_file, " single exit %d -> %d, exit condition ", @@ -5806,9 +6660,12 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) } body = get_loop_body (loop); + data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); free (body); + data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); + /* For each ssa name determines whether it behaves as an induction variable in some loop. */ if (!find_induction_variables (data))