static enum machine_mode hfa_element_mode (tree, bool);
static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
tree, int *, int);
-static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
- tree, bool);
static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
static bool ia64_function_ok_for_sibcall (tree, tree);
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
-#undef TARGET_PASS_BY_REFERENCE
-#define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
static rtx
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
- HOST_WIDE_INT addend)
+ rtx orig_op1, HOST_WIDE_INT addend)
{
rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
- rtx orig_op0 = op0, orig_op1 = op1;
+ rtx orig_op0 = op0;
HOST_WIDE_INT addend_lo, addend_hi;
- addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
- addend_hi = addend - addend_lo;
-
switch (tls_kind)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
break;
case TLS_MODEL_INITIAL_EXEC:
+ addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+ addend_hi = addend - addend_lo;
+
op1 = plus_constant (op1, addend_hi);
addend = addend_lo;
tls_kind = tls_symbolic_operand_type (sym);
if (tls_kind)
- return ia64_expand_tls_address (tls_kind, op0, sym, addend);
+ return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
if (any_offset_symbol_operand (sym, mode))
addend = 0;
return true;
}
+/* Emit an integral vector widening sum operations. */
+
+void
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+{
+ rtx l, h, x, s;
+ enum machine_mode wmode, mode;
+ rtx (*unpack_l) (rtx, rtx, rtx);
+ rtx (*unpack_h) (rtx, rtx, rtx);
+ rtx (*plus) (rtx, rtx, rtx);
+
+ wmode = GET_MODE (operands[0]);
+ mode = GET_MODE (operands[1]);
+
+ switch (mode)
+ {
+ case V8QImode:
+ unpack_l = gen_unpack1_l;
+ unpack_h = gen_unpack1_h;
+ plus = gen_addv4hi3;
+ break;
+ case V4HImode:
+ unpack_l = gen_unpack2_l;
+ unpack_h = gen_unpack2_h;
+ plus = gen_addv2si3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Fill in x with the sign extension of each element in op1. */
+ if (unsignedp)
+ x = CONST0_RTX (mode);
+ else
+ {
+ bool neg;
+
+ x = gen_reg_rtx (mode);
+
+ neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
+ CONST0_RTX (mode));
+ gcc_assert (!neg);
+ }
+
+ l = gen_reg_rtx (wmode);
+ h = gen_reg_rtx (wmode);
+ s = gen_reg_rtx (wmode);
+
+ emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
+ emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
+ emit_insn (plus (s, l, operands[2]));
+ emit_insn (plus (operands[0], h, s));
+}
+
+/* Emit a signed or unsigned V8QI dot product operation. */
+
+void
+ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
+{
+ rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
+
+ /* Fill in x1 and x2 with the sign extension of each element. */
+ if (unsignedp)
+ x1 = x2 = CONST0_RTX (V8QImode);
+ else
+ {
+ bool neg;
+
+ x1 = gen_reg_rtx (V8QImode);
+ x2 = gen_reg_rtx (V8QImode);
+
+ neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
+ CONST0_RTX (V8QImode));
+ gcc_assert (!neg);
+ neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
+ CONST0_RTX (V8QImode));
+ gcc_assert (!neg);
+ }
+
+ l1 = gen_reg_rtx (V4HImode);
+ l2 = gen_reg_rtx (V4HImode);
+ h1 = gen_reg_rtx (V4HImode);
+ h2 = gen_reg_rtx (V4HImode);
+
+ emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
+ emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
+ emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
+ emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
+
+ p1 = gen_reg_rtx (V2SImode);
+ p2 = gen_reg_rtx (V2SImode);
+ p3 = gen_reg_rtx (V2SImode);
+ p4 = gen_reg_rtx (V2SImode);
+ emit_insn (gen_pmpy2_r (p1, l1, l2));
+ emit_insn (gen_pmpy2_l (p2, l1, l2));
+ emit_insn (gen_pmpy2_r (p3, h1, h2));
+ emit_insn (gen_pmpy2_l (p4, h1, h2));
+
+ s1 = gen_reg_rtx (V2SImode);
+ s2 = gen_reg_rtx (V2SImode);
+ s3 = gen_reg_rtx (V2SImode);
+ emit_insn (gen_addv2si3 (s1, p1, p2));
+ emit_insn (gen_addv2si3 (s2, p3, p4));
+ emit_insn (gen_addv2si3 (s3, s1, operands[3]));
+ emit_insn (gen_addv2si3 (operands[0], s2, s3));
+}
+
/* Emit the appropriate sequence for a call. */
void
enum insn_code icode;
/* Special case for using fetchadd. */
- if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
+ if ((mode == SImode || mode == DImode)
+ && (code == PLUS || code == MINUS)
+ && fetchadd_operand (val, mode))
{
+ if (code == MINUS)
+ val = GEN_INT (-INTVAL (val));
+
if (!old_dst)
old_dst = gen_reg_rtx (mode);
emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
- emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
+ emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
}
\f
/* Begin the assembly file. */
return PARM_BOUNDARY;
}
-/* Variable sized types are passed by reference. */
-/* ??? At present this is a GCC extension to the IA-64 ABI. */
-
-static bool
-ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- tree type, bool named ATTRIBUTE_UNUSED)
-{
- return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
-}
-
/* True if it is OK to do sibling call optimization for the specified
call expression EXP. DECL will be the called function, or NULL if
this is an indirect call. */
if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
{
- tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
- build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
- t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
- build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
- t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+ tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
+ build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
+ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+ build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
gimplify_and_add (t, pre_p);
}
\f
-/* If the following function returns TRUE, we will use the the DFA
+/* If the following function returns TRUE, we will use the DFA
insn scheduler. */
static int
set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
+ /* HP-UX 11.23 libc does not have a function for unsigned
+ SImode-to-TFmode conversion. */
+ set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
}
/* Rename all the TFmode libfuncs using the HPUX conventions. */
}
}
+/* Implement the FUNCTION_PROFILER macro. */
+
void
ia64_output_function_profiler (FILE *file, int labelno)
{
+ bool indirect_call;
+
+ /* If the function needs a static chain and the static chain
+ register is r15, we use an indirect call so as to bypass
+ the PLT stub in case the executable is dynamically linked,
+ because the stub clobbers r15 as per 5.3.6 of the psABI.
+ We don't need to do that in non canonical PIC mode. */
+
+ if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
+ {
+ gcc_assert (STATIC_CHAIN_REGNUM == 15);
+ indirect_call = true;
+ }
+ else
+ indirect_call = false;
+
if (TARGET_GNU_AS)
fputs ("\t.prologue 4, r40\n", file);
else
fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
if (NO_PROFILE_COUNTERS)
- fputs ("\tmov out3 = r0\n\t;;\n", file);
+ fputs ("\tmov out3 = r0\n", file);
else
{
char buf[20];
fputs ("\taddl out3 = @ltoff(", file);
assemble_name (file, buf);
if (TARGET_AUTO_PIC)
- fputs (")\n\t;;\n", file);
+ fputs (")\n", file);
else
- fputs ("), r1\n\t;;\n", file);
+ fputs ("), r1\n", file);
}
+ if (indirect_call)
+ fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
+ fputs ("\t;;\n", file);
+
fputs ("\t.save rp, r42\n", file);
fputs ("\tmov out2 = b0\n", file);
+ if (indirect_call)
+ fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
fputs ("\t.body\n", file);
fputs ("\tmov out1 = r1\n", file);
- fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
+ if (indirect_call)
+ {
+ fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
+ fputs ("\tmov b6 = r16\n", file);
+ fputs ("\tld8 r1 = [r14]\n", file);
+ fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
+ }
+ else
+ fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
}
static GTY(()) rtx mcount_func_rtx;