From 9c42dd286eb8af0872f07596312c4f3c6ef6af03 Mon Sep 17 00:00:00 2001
From: rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sun, 29 Oct 2006 15:18:24 +0000
Subject: [PATCH] 2006-10-29  Richard Guenther  <rguenther@suse.de>

	* genopinit.c (optabs): Change lfloor_optab and lceil_optab
	to conversion optabs.
	* optabs.c (init_optabs): Initialize lfloor_optab and lceil_optab
	as conversion optab.
	* optabs.h (enum optab_index): Remove OTI_lfloor and OTI_lceil.
	(enum convert_optab_index): Add COI_lfloor and COI_lceil.
	(lfloor_optab, lceil_optab): Adjust defines.
	* builtins.c (expand_builtin_int_roundingfn): Adjust for
	lfloor and lceil optabs now being conversion optabs.
	* config/i386/i386-protos.h (ix86_expand_lfloorceil): Declare.
	* config/i386/i386.c (ix86_expand_sse_compare_and_jump):
	New static helper function.
	(ix86_expand_lfloorceil): New function to expand lfloor and
	lceil inline.
	* config/i386/i386.md (lfloor<mode>2): Split into ...
	(lfloorxf<mode>2): ... x87 variant
	(lfloor<mode>di2, lfloor<mode>si2): ... and SSE variants
	using ix86_expand_lfloorceil.
	(lceil<mode>2, lceilxf<mode>2, lceil<mode>di2, lceil<mode>si2):
	Likewise.
	* doc/md.texi (lfloorMN, lceilMN): Document.

	* gcc.target/i386/math-torture/lfloor.c: New testcase.
	* gcc.target/i386/math-torture/lceil.c: Likewise.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@118143 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      | 24 +++++++++
 gcc/builtins.c                                     | 57 +++++++++-----------
 gcc/config/i386/i386-protos.h                      |  1 +
 gcc/config/i386/i386.c                             | 63 ++++++++++++++++++++++
 gcc/config/i386/i386.md                            | 44 ++++++++++++++-
 gcc/doc/md.texi                                    | 12 +++++
 gcc/genopinit.c                                    |  4 +-
 gcc/optabs.c                                       |  8 ++-
 gcc/optabs.h                                       |  8 +--
 gcc/testsuite/ChangeLog                            |  5 ++
 gcc/testsuite/gcc.target/i386/math-torture/lceil.c | 26 +++++++++
 .../gcc.target/i386/math-torture/lfloor.c          | 26 +++++++++
 12 files changed, 236 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/math-torture/lceil.c
 create mode 100644 gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ce69fe704c5..9289589ae7c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,27 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+	* genopinit.c (optabs): Change lfloor_optab and lceil_optab
+	to conversion optabs.
+	* optabs.c (init_optabs): Initialize lfloor_optab and lceil_optab
+	as conversion optab.
+	* optabs.h (enum optab_index): Remove OTI_lfloor and OTI_lceil.
+	(enum convert_optab_index): Add COI_lfloor and COI_lceil.
+	(lfloor_optab, lceil_optab): Adjust defines.
+	* builtins.c (expand_builtin_int_roundingfn): Adjust for
+	lfloor and lceil optabs now being conversion optabs.
+	* config/i386/i386-protos.h (ix86_expand_lfloorceil): Declare.
+	* config/i386/i386.c (ix86_expand_sse_compare_and_jump):
+	New static helper function.
+	(ix86_expand_lfloorceil): New function to expand lfloor and
+	lceil inline.
+	* config/i386/i386.md (lfloor<mode>2): Split into ...
+	(lfloorxf<mode>2): ... x87 variant
+	(lfloor<mode>di2, lfloor<mode>si2): ... and SSE variants
+	using ix86_expand_lfloorceil.
+	(lceil<mode>2, lceilxf<mode>2, lceil<mode>di2, lceil<mode>si2):
+	Likewise.
+	* doc/md.texi (lfloorMN, lceilMN): Document.
+
 2006-10-29  Richard Sandiford  <richard@codesourcery.com>
 
 	* configure.ac (HAVE_AS_NO_SHARED): New AC_DEFINE.  Test for the
diff --git a/gcc/builtins.c b/gcc/builtins.c
index aedecc9afd0..b7d23c379bd 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2222,7 +2222,7 @@ expand_builtin_sincos (tree exp)
 static rtx
 expand_builtin_int_roundingfn (tree exp, rtx target, rtx subtarget)
 {
-  optab builtin_optab;
+  convert_optab builtin_optab;
   rtx op0, insns, tmp;
   tree fndecl = get_callee_fndecl (exp);
   tree arglist = TREE_OPERAND (exp, 1);
@@ -2257,44 +2257,37 @@ expand_builtin_int_roundingfn (tree exp, rtx target, rtx subtarget)
   /* Make a suitable register to place result in.  */
   mode = TYPE_MODE (TREE_TYPE (exp));
 
-  /* Before working hard, check whether the instruction is available.  */
-  if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
-    {
-      target = gen_reg_rtx (mode);
-
-      /* Wrap the computation of the argument in a SAVE_EXPR, as we may
-	 need to expand the argument again.  This way, we will not perform
-	 side-effects more the once.  */
-      narg = builtin_save_expr (arg);
-      if (narg != arg)
-	{
-	  arg = narg;
-	  arglist = build_tree_list (NULL_TREE, arg);
-	  exp = build_function_call_expr (fndecl, arglist);
-	}
-
-      op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+  target = gen_reg_rtx (mode);
 
-      start_sequence ();
+  /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+     need to expand the argument again.  This way, we will not perform
+     side-effects more the once.  */
+  narg = builtin_save_expr (arg);
+  if (narg != arg)
+    {
+      arg = narg;
+      arglist = build_tree_list (NULL_TREE, arg);
+      exp = build_function_call_expr (fndecl, arglist);
+    }
 
-      /* Compute into TARGET.
-	 Set TARGET to wherever the result comes back.  */
-      target = expand_unop (mode, builtin_optab, op0, target, 0);
+  op0 = expand_expr (arg, subtarget, VOIDmode, 0);
 
-      if (target != 0)
-	{
-	  /* Output the entire sequence.  */
-	  insns = get_insns ();
-	  end_sequence ();
-	  emit_insn (insns);
-	  return target;
-	}
+  start_sequence ();
 
-      /* If we were unable to expand via the builtin, stop the sequence
-	 (without outputting the insns).  */
+  /* Compute into TARGET.  */
+  if (expand_sfix_optab (target, op0, builtin_optab))
+    {
+      /* Output the entire sequence.  */
+      insns = get_insns ();
       end_sequence ();
+      emit_insn (insns);
+      return target;
     }
 
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns).  */
+  end_sequence ();
+
   /* Fall back to floating point rounding optab.  */
   fallback_fndecl = mathfn_built_in (TREE_TYPE (arg), fallback_fn);
   /* We shouldn't get here on targets without TARGET_C99_FUNCTIONS.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 13e80641ba8..996dcdbd097 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -158,6 +158,7 @@ extern void ix86_emit_i387_log1p (rtx, rtx);
 extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
 
 extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a535b45f12c..0291a97a8ae 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19246,6 +19246,36 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
 			  gen_rtx_IOR (mode, abs_value, sgn)));
 }
 
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+   swapping the operands if SWAP_OPERANDS is true.  The expanded
+   code is a forward jump to a newly created label in case the
+   comparison is true.  The generated label rtx is returned.  */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+                                  bool swap_operands)
+{
+  rtx label, tmp;
+
+  if (swap_operands)
+    {
+      tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  label = gen_label_rtx ();
+  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			  gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  return label;
+}
+
 /* Expand SSE sequence for computing lround from OP1 storing
    into OP0.  */
 void
@@ -19276,4 +19306,37 @@ ix86_expand_lround (rtx op0, rtx op1)
   expand_fix (op0, adj, 0);
 }
 
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+  /* C code for the stuff we're doing below (for do_floor):
+	xi = (long)op1;
+        xi -= (double)xi > op1 ? 1 : 0;
+        return xi;
+   */
+  enum machine_mode fmode = GET_MODE (op1);
+  enum machine_mode imode = GET_MODE (op0);
+  rtx ireg, freg, label;
+
+  /* reg = (long)op1 */
+  ireg = gen_reg_rtx (imode);
+  expand_fix (ireg, op1, 0);
+
+  /* freg = (double)reg */
+  freg = gen_reg_rtx (fmode);
+  expand_float (freg, ireg, 0);
+
+  /* ireg = (freg > op1) ? ireg - 1 : ireg */
+  label = ix86_expand_sse_compare_and_jump (UNLE,
+					    freg, op1, !do_floor);
+  expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+                       ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (op0, ireg);
+}
+
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d7c273377fc..abceb151c67 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17598,7 +17598,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lfloor<mode>2"
+(define_expand "lfloorxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_FLOOR))
@@ -17608,6 +17608,26 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lfloor<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_ceil"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -17839,7 +17859,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lceil<mode>2"
+(define_expand "lceilxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_CEIL))
@@ -17849,6 +17869,26 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lceil<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
+(define_expand "lceil<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_trunc"
   [(set (match_operand:XF 0 "register_operand" "=f")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7b55db73c5e..e5f12c4b0bc 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3714,6 +3714,18 @@ Convert operand 1 (valid for floating point mode @var{m}) to fixed
 point mode @var{n} as a signed number rounding to nearest and away
 from zero and store in operand 0 (which has mode @var{n}).
 
+@cindex @code{lfloor@var{m}@var{n}2}
+@item @samp{lfloor@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding down and store in
+operand 0 (which has mode @var{n}).
+
+@cindex @code{lceil@var{m}@var{n}2}
+@item @samp{lceil@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding up and store in
+operand 0 (which has mode @var{n}).
+
 @cindex @code{copysign@var{m}3} instruction pattern
 @item @samp{copysign@var{m}3}
 Store a value with the magnitude of operand 1 and the sign of operand
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index fcd2a68ff14..eb7a193dd95 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -120,9 +120,9 @@ static const char * const optabs[] =
   "copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
   "sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
-  "lfloor_optab->handlers[$A].insn_code = CODE_FOR_$(lfloor$a2$)",
+  "lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
   "ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
-  "lceil_optab->handlers[$A].insn_code = CODE_FOR_$(lceil$a2$)",
+  "lceil_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lceil$F$a$I$b2$)",
   "round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
   "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
   "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
diff --git a/gcc/optabs.c b/gcc/optabs.c
index ea6b7f3a6ca..4f9374aa132 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5299,9 +5299,7 @@ init_optabs (void)
   parity_optab = init_optab (PARITY);
   sqrt_optab = init_optab (SQRT);
   floor_optab = init_optab (UNKNOWN);
-  lfloor_optab = init_optab (UNKNOWN);
   ceil_optab = init_optab (UNKNOWN);
-  lceil_optab = init_optab (UNKNOWN);
   round_optab = init_optab (UNKNOWN);
   btrunc_optab = init_optab (UNKNOWN);
   nearbyint_optab = init_optab (UNKNOWN);
@@ -5366,6 +5364,8 @@ init_optabs (void)
   ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
   lrint_optab = init_convert_optab (UNKNOWN);
   lround_optab = init_convert_optab (UNKNOWN);
+  lfloor_optab = init_convert_optab (UNKNOWN);
+  lceil_optab = init_convert_optab (UNKNOWN);
 
   for (i = 0; i < NUM_MACHINE_MODES; i++)
     {
@@ -5489,6 +5489,10 @@ init_optabs (void)
 				 MODE_INT, MODE_FLOAT);
   init_interclass_conv_libfuncs (lround_optab, "lround",
 				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lfloor_optab, "lfloor",
+				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lceil_optab, "lceil",
+				 MODE_INT, MODE_FLOAT);
 
   /* sext_optab is also used for FLOAT_EXTEND.  */
   init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 0afc2ecc68c..0f84c9828d5 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -189,9 +189,7 @@ enum optab_index
   OTI_log1p,
   /* Rounding functions */
   OTI_floor,
-  OTI_lfloor,
   OTI_ceil,
-  OTI_lceil,
   OTI_btrunc,
   OTI_round,
   OTI_nearbyint,
@@ -337,9 +335,7 @@ extern GTY(()) optab optab_table[OTI_MAX];
 #define log2_optab (optab_table[OTI_log2])
 #define log1p_optab (optab_table[OTI_log1p])
 #define floor_optab (optab_table[OTI_floor])
-#define lfloor_optab (optab_table[OTI_lfloor])
 #define ceil_optab (optab_table[OTI_ceil])
-#define lceil_optab (optab_table[OTI_lceil])
 #define btrunc_optab (optab_table[OTI_btrunc])
 #define round_optab (optab_table[OTI_round])
 #define nearbyint_optab (optab_table[OTI_nearbyint])
@@ -407,6 +403,8 @@ enum convert_optab_index
 
   COI_lrint,
   COI_lround,
+  COI_lfloor,
+  COI_lceil,
 
   COI_MAX
 };
@@ -424,6 +422,8 @@ extern GTY(()) convert_optab convert_optab_table[COI_MAX];
 #define ufloat_optab (convert_optab_table[COI_ufloat])
 #define lrint_optab (convert_optab_table[COI_lrint])
 #define lround_optab (convert_optab_table[COI_lround])
+#define lfloor_optab (convert_optab_table[COI_lfloor])
+#define lceil_optab (convert_optab_table[COI_lceil])
 
 /* These arrays record the insn_code of insns that may be needed to
    perform input and output reloads of special objects.  They provide a
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 173233cb302..caad33e4252 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2006-10-29  Richard Guenther  <rguenther@suse.de>
+
+	* gcc.target/i386/math-torture/lfloor.c: New testcase.
+	* gcc.target/i386/math-torture/lceil.c: Likewise.
+
 2006-10-28  Tobias Burnus  <burnus@net-b.de>
 
 	PR libgfortran/24313
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/lceil.c b/gcc/testsuite/gcc.target/i386/math-torture/lceil.c
new file mode 100644
index 00000000000..d09847904e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/lceil.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lceilf (x);
+}
+long testl (double x)
+{
+  return __builtin_lceil (x);
+}
+long testll (long double x)
+{
+  return __builtin_lceill (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llceilf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llceil (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llceill (x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/lfloor.c b/gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
new file mode 100644
index 00000000000..2c2e96f2e53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lfloorf (x);
+}
+long testl (double x)
+{
+  return __builtin_lfloor (x);
+}
+long testll (long double x)
+{
+  return __builtin_lfloorl (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llfloorf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llfloor (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llfloorl (x);
+}
-- 
2.11.0