DEF_BUILTIN (SI_FRDS, CODE_FOR_spu_frds, "si_frds", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FESD, CODE_FOR_spu_fesd, "si_fesd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCEQ, CODE_FOR_ceq_v4sf, "si_fceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCEQ, CODE_FOR_ceq_v2df, "si_dfceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCMEQ, CODE_FOR_cmeq_v4sf, "si_fcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMEQ, CODE_FOR_cmeq_v2df, "si_dfcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCGT, CODE_FOR_cgt_v4sf, "si_fcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCGT, CODE_FOR_cgt_v2df, "si_dfcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCMGT, CODE_FOR_cmgt_v4sf, "si_fcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMGT, CODE_FOR_cmgt_v2df, "si_dfcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFTSV, CODE_FOR_dftsv, "si_dftsv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
DEF_BUILTIN (SI_STOP, CODE_FOR_spu_stop, "si_stop", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_U14))
DEF_BUILTIN (SI_STOPD, CODE_FOR_spu_stopd, "si_stopd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_LNOP, CODE_FOR_lnop, "si_lnop", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_BISLED, CODE_FOR_spu_bisled, "spu_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
DEF_BUILTIN (SPU_BISLED_D, CODE_FOR_spu_bisledd, "spu_bisled_d", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
DEF_BUILTIN (SPU_BISLED_E, CODE_FOR_spu_bislede, "spu_bisled_e", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
-DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
+DEF_BUILTIN (SPU_TESTSV, CODE_FOR_dftsv, "spu_testsv", B_INSN, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7))
/* definitions to support overloaded generic builtin functions: */
DEF_BUILTIN (SPU_CMPEQ_10, CODE_FOR_ceq_v8hi, "spu_cmpeq_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
DEF_BUILTIN (SPU_CMPEQ_11, CODE_FOR_ceq_v4si, "spu_cmpeq_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
DEF_BUILTIN (SPU_CMPEQ_12, CODE_FOR_ceq_v4si, "spu_cmpeq_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPEQ_13, CODE_FOR_ceq_v2df, "spu_cmpeq_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_nothing, "spu_cmpabseq", B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSEQ_0, CODE_FOR_cmeq_v4sf, "spu_cmpabseq_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSEQ_1, CODE_FOR_cmeq_v2df, "spu_cmpabseq_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
DEF_BUILTIN (SPU_CMPGT, CODE_FOR_nothing, "spu_cmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_CMPGT_0, CODE_FOR_clgt_v16qi, "spu_cmpgt_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
DEF_BUILTIN (SPU_CMPGT_1, CODE_FOR_cgt_v16qi, "spu_cmpgt_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI))
DEF_BUILTIN (SPU_CMPGT_10, CODE_FOR_cgt_v8hi, "spu_cmpgt_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
DEF_BUILTIN (SPU_CMPGT_11, CODE_FOR_cgt_v4si, "spu_cmpgt_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
DEF_BUILTIN (SPU_CMPGT_12, CODE_FOR_clgt_v4si, "spu_cmpgt_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPGT_13, CODE_FOR_cgt_v2df, "spu_cmpgt_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_nothing, "spu_cmpabsgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSGT_0, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSGT_1, CODE_FOR_cmgt_v2df, "spu_cmpabsgt_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
DEF_BUILTIN (SPU_HCMPEQ, CODE_FOR_nothing, "spu_hcmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_HCMPEQ_0, CODE_FOR_spu_heq, "spu_hcmpeq_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI))
DEF_BUILTIN (SPU_HCMPEQ_1, CODE_FOR_spu_heq, "spu_hcmpeq_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI))
static bool insn_clobbers_hbr (rtx insn);
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
int distance);
+static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
+ enum machine_mode dmode);
static rtx get_branch_target (rtx branch);
static void insert_branch_hints (void);
static void insert_nops (void);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
+/* Which instruction set architecture to use. */
+int spu_arch;
+/* Which cpu are we tuning for. */
+int spu_tune;
+
enum spu_immediate {
SPU_NONE,
SPU_IL,
if (spu_fixed_range_string)
fix_range (spu_fixed_range_string);
+
+ /* Determine processor architectural level. */
+ if (spu_arch_string)
+ {
+ if (strcmp (&spu_arch_string[0], "cell") == 0)
+ spu_arch = PROCESSOR_CELL;
+ else if (strcmp (&spu_arch_string[0], "celledp") == 0)
+ spu_arch = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_arch_string[0]);
+ }
+
+ /* Determine processor to tune for. */
+ if (spu_tune_string)
+ {
+ if (strcmp (&spu_tune_string[0], "cell") == 0)
+ spu_tune = PROCESSOR_CELL;
+ else if (strcmp (&spu_tune_string[0], "celledp") == 0)
+ spu_tune = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_tune_string[0]);
+ }
}
\f
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
enum spu_comp_code
{ SPU_EQ, SPU_GT, SPU_GTU };
-
-int spu_comp_icode[8][3] = {
- {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
- {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
- {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
- {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
- {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
- {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
- {0, 0, 0},
- {CODE_FOR_ceq_vec, 0, 0},
+int spu_comp_icode[12][3] = {
+ {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
+ {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
+ {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
+ {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
+ {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
+ {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
+ {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
+ {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
+ {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
+ {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
+ {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
+ {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
};
/* Generate a compare for CODE. Return a brand-new rtx that represents
index = 6;
break;
case V16QImode:
+ index = 7;
+ comp_mode = op_mode;
+ break;
case V8HImode:
+ index = 8;
+ comp_mode = op_mode;
+ break;
case V4SImode:
- case V2DImode:
+ index = 9;
+ comp_mode = op_mode;
+ break;
case V4SFmode:
+ index = 10;
+ comp_mode = V4SImode;
+ break;
case V2DFmode:
- index = 7;
+ index = 11;
+ comp_mode = V2DImode;
break;
+ case V2DImode:
default:
abort ();
}
if (GET_MODE (spu_compare_op1) == DFmode)
{
rtx reg = gen_reg_rtx (DFmode);
- if (!flag_unsafe_math_optimizations
+ if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|| (scode != SPU_GT && scode != SPU_EQ))
abort ();
- if (reverse_compare)
- emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
- else
- emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
- reverse_compare = 0;
- spu_compare_op0 = reg;
- spu_compare_op1 = CONST0_RTX (DFmode);
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ if (reverse_compare)
+ emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
+ else
+ emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
+ reverse_compare = 0;
+ spu_compare_op0 = reg;
+ spu_compare_op1 = CONST0_RTX (DFmode);
+ }
}
if (is_set == 0 && spu_compare_op1 == const0_rtx
size.) */
int spu_hint_dist = (8 * 4);
+/* Create a MODE vector constant from 4 ints. */
+rtx
+spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+{
+ unsigned char arr[16];
+ arr[0] = (a >> 24) & 0xff;
+ arr[1] = (a >> 16) & 0xff;
+ arr[2] = (a >> 8) & 0xff;
+ arr[3] = (a >> 0) & 0xff;
+ arr[4] = (b >> 24) & 0xff;
+ arr[5] = (b >> 16) & 0xff;
+ arr[6] = (b >> 8) & 0xff;
+ arr[7] = (b >> 0) & 0xff;
+ arr[8] = (c >> 24) & 0xff;
+ arr[9] = (c >> 16) & 0xff;
+ arr[10] = (c >> 8) & 0xff;
+ arr[11] = (c >> 0) & 0xff;
+ arr[12] = (d >> 24) & 0xff;
+ arr[13] = (d >> 16) & 0xff;
+ arr[14] = (d >> 8) & 0xff;
+ arr[15] = (d >> 0) & 0xff;
+ return array_to_constant(mode, arr);
+}
+
/* An array of these is used to propagate hints to predecessor blocks. */
struct spu_bb_info
{
}
}
+/* Return insn index for the vector compare instruction for given CODE,
+ and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
+
+static int
+get_vec_cmp_insn (enum rtx_code code,
+ enum machine_mode dest_mode,
+ enum machine_mode op_mode)
+
+{
+ switch (code)
+ {
+ case EQ:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_ceq_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_ceq_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_ceq_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_ceq_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_ceq_v2df;
+ break;
+ case GT:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_cgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_cgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_cgt_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_cgt_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_cgt_v2df;
+ break;
+ case GTU:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_clgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_clgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_clgt_v4si;
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+ DMODE is expected destination mode. This is a recursive function. */
+
+static rtx
+spu_emit_vector_compare (enum rtx_code rcode,
+ rtx op0, rtx op1,
+ enum machine_mode dmode)
+{
+ int vec_cmp_insn;
+ rtx mask;
+ enum machine_mode dest_mode;
+ enum machine_mode op_mode = GET_MODE (op1);
+
+ gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+ /* Floating point vector compare instructions uses destination V4SImode.
+ Double floating point vector compare instructions uses destination V2DImode.
+ Move destination to appropriate mode later. */
+ if (dmode == V4SFmode)
+ dest_mode = V4SImode;
+ else if (dmode == V2DFmode)
+ dest_mode = V2DImode;
+ else
+ dest_mode = dmode;
+
+ mask = gen_reg_rtx (dest_mode);
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ if (vec_cmp_insn == -1)
+ {
+ bool swap_operands = false;
+ bool try_again = false;
+ switch (rcode)
+ {
+ case LT:
+ rcode = GT;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case LTU:
+ rcode = GTU;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case NE:
+ /* Treat A != B as ~(A==B). */
+ {
+ enum insn_code nor_code;
+ rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+ nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (nor_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+ /* Try GT/GTU/LT/LTU OR EQ */
+ {
+ rtx c_rtx, eq_rtx;
+ enum insn_code ior_code;
+ enum rtx_code new_code;
+
+ switch (rcode)
+ {
+ case GE: new_code = GT; break;
+ case GEU: new_code = GTU; break;
+ case LE: new_code = LT; break;
+ case LEU: new_code = LTU; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
+ eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+
+ ior_code = ior_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (ior_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* You only get two chances. */
+ if (try_again)
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ gcc_assert (vec_cmp_insn != -1);
+
+ if (swap_operands)
+ {
+ rtx tmp;
+ tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+ }
+
+ emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+}
+
+
+/* Emit vector conditional expression.
+ DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+ CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
+
+int
+spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+ rtx cond, rtx cc_op0, rtx cc_op1)
+{
+ enum machine_mode dest_mode = GET_MODE (dest);
+ enum rtx_code rcode = GET_CODE (cond);
+ rtx mask;
+
+ /* Get the vector mask for the given relational operations. */
+ mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+ emit_insn(gen_selb (dest, op2, op1, mask));
+
+ return 1;
+}
+
static rtx
spu_force_reg (enum machine_mode mode, rtx op)
{
(define_attr "length" ""
(const_int 4))
+(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in spu.h.
;; for 6 cycles and the rest of the operation pipelines for
;; 7 cycles. The simplest way to model this is to simply ignore
;; the 6 cyle stall.
-(define_insn_reservation "FPD" 7 (eq_attr "type" "fpd")
+(define_insn_reservation "FPD" 7
+ (and (eq_attr "tune" "cell")
+ (eq_attr "type" "fpd"))
"pipe0 + pipe1, fp, nothing*5")
+;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
+(define_insn_reservation "FPD_CELLEDP" 9
+ (and (eq_attr "tune" "celledp")
+ (eq_attr "type" "fpd"))
+ "pipe0 + fp, nothing*8")
+
(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
"pipe1")
(UNSPEC_WRCH 48)
(UNSPEC_SPU_REALIGN_LOAD 49)
(UNSPEC_SPU_MASK_FOR_LOAD 50)
+ (UNSPEC_DFTSV 51)
])
(include "predicates.md")
(define_mode_macro VSF [SF V4SF])
(define_mode_macro VDF [DF V2DF])
+(define_mode_macro VCMP [V16QI
+ V8HI
+ V4SI
+ V4SF
+ V2DF])
+
+(define_mode_macro VCMPU [V16QI
+ V8HI
+ V4SI])
+
(define_mode_attr bh [(QI "b") (V16QI "b")
(HI "h") (V8HI "h")
(SI "") (V4SI "")])
(DF "d") (V2DF "d")])
(define_mode_attr d6 [(SF "6") (V4SF "6")
(DF "d") (V2DF "d")])
-(define_mode_attr f2i [(SF "SI") (V4SF "V4SI")
+
+(define_mode_attr f2i [(SF "si") (V4SF "v4si")
+ (DF "di") (V2DF "v2di")])
+(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
(DF "DI") (V2DF "V2DI")])
+(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
+
(define_mode_attr umask [(HI "f") (V8HI "f")
(SI "g") (V4SI "g")])
(define_mode_attr nmask [(HI "F") (V8HI "F")
(neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, -0x80000000ull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
(define_expand "neg<mode>2"
[(parallel
(neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, -0x8000000000000000ull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
(define_insn_and_split "_neg<mode>2"
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
- (use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
+ (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
""
"#"
""
- [(set (match_dup:<f2i> 3)
- (xor:<f2i> (match_dup:<f2i> 4)
- (match_dup:<f2i> 2)))]
+ [(set (match_dup:<F2I> 3)
+ (xor:<F2I> (match_dup:<F2I> 4)
+ (match_dup:<F2I> 2)))]
{
- operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
- operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
+ operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+ operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
})
\f
(abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
(define_expand "abs<mode>2"
[(parallel
(abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffffffffffull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
(define_insn_and_split "_abs<mode>2"
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
- (use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
+ (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
""
"#"
""
- [(set (match_dup:<f2i> 3)
- (and:<f2i> (match_dup:<f2i> 4)
- (match_dup:<f2i> 2)))]
+ [(set (match_dup:<F2I> 3)
+ (and:<F2I> (match_dup:<F2I> 4)
+ (match_dup:<F2I> 2)))]
{
- operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
- operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
+ operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+ operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
})
\f
(set_attr "length" "12")])
(define_insn "ceq_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (eq:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
(match_operand:VSF 2 "spu_reg_operand" "r")))]
""
"fceq\t%0,%1,%2")
(define_insn "cmeq_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (eq:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
""
"fcmeq\t%0,%1,%2")
-(define_insn "ceq_vec"
+;; These implementations of ceq_df and cgt_df do not correctly handle
+;; NAN or INF. We will also get incorrect results when the result
+;; of the double subtract is too small.
+(define_expand "ceq_df"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
- (eq:SI (match_operand 1 "spu_reg_operand" "r")
- (match_operand 2 "spu_reg_operand" "r")))]
- "VECTOR_MODE_P(GET_MODE(operands[1]))
- && GET_MODE(operands[1]) == GET_MODE(operands[2])"
- "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
- [(set_attr "length" "12")])
+ (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
+ (match_operand:DF 2 "const_zero_operand" "i")))]
+ ""
+{
+ if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ {
+ rtx s0_ti = gen_reg_rtx(TImode);
+ rtx s1_v4 = gen_reg_rtx(V4SImode);
+ rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
+ rtx to_ti = gen_reg_rtx(TImode);
+ rtx to_v4 = gen_reg_rtx(V4SImode);
+ rtx l_v4 = gen_reg_rtx(V4SImode);
+ emit_insn (gen_spu_convert (l_v4, operands[1]));
+ emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
+ emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
+ emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
+ emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
+ emit_insn (gen_spu_convert (to_v4, to_ti));
+ emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
+ emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
+ emit_insn (gen_spu_convert (operands[0], to_v4));
+ DONE;
+ }
+})
+
+(define_insn "ceq_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+ (match_operand:VDF 2 "spu_reg_operand" "r")))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfceq\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_insn "cmeq_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+ (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcmeq\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "ceq_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+ ""
+{
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx biteq = gen_reg_rtx (V4SImode);
+ rtx ahi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx iszero = gen_reg_rtx (V4SImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hihi_promote = gen_reg_rtx (TImode);
+
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+ 0x08090A0B, 0x18191A1B);
+ emit_move_insn (hihi_promote, pat);
+
+ emit_insn (gen_ceq_v4si (biteq, ra, rb));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+ emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+ DONE;
+ }
+})
+
+(define_expand "cmeq_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+ (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx biteq = gen_reg_rtx (V4SImode);
+ rtx ahi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hihi_promote = gen_reg_rtx (TImode);
+
+ emit_move_insn (sign_mask, pat);
+
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+ 0x08090A0B, 0x18191A1B);
+ emit_move_insn (hihi_promote, pat);
+
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
+ emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+ DONE;
+ }
+})
\f
;; cgt
(set_attr "length" "36")])
(define_insn "cgt_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (gt:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
(match_operand:VSF 2 "spu_reg_operand" "r")))]
""
"fcgt\t%0,%1,%2")
(define_insn "cmgt_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (gt:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
""
"fcmgt\t%0,%1,%2")
+(define_expand "cgt_df"
+ [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+ (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
+ (match_operand:DF 2 "const_zero_operand" "i")))]
+ ""
+{
+ if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ {
+ rtx s0_ti = gen_reg_rtx(TImode);
+ rtx s1_v4 = gen_reg_rtx(V4SImode);
+ rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
+ rtx to_ti = gen_reg_rtx(TImode);
+ rtx to_v4 = gen_reg_rtx(V4SImode);
+ rtx l_v4 = gen_reg_rtx(V4SImode);
+ emit_insn (gen_spu_convert(l_v4, operands[1]));
+ emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
+ emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
+ emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
+ emit_insn (gen_spu_convert(to_v4, to_ti));
+ emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
+ emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
+ emit_insn (gen_spu_convert(operands[0], to_v4));
+ DONE;
+ }
+})
+
+(define_insn "cgt_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+ (match_operand:VDF 2 "spu_reg_operand" "r")))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcgt\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_insn "cmgt_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+ (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcmgt\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "cgt_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx zero = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx hi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx b_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx asel = gen_reg_rtx (V4SImode);
+ rtx bsel = gen_reg_rtx (V4SImode);
+ rtx abor = gen_reg_rtx (V4SImode);
+ rtx bbor = gen_reg_rtx (V4SImode);
+ rtx gt_hi = gen_reg_rtx (V4SImode);
+ rtx gt_lo = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ rtx borrow_shuffle = gen_reg_rtx (TImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+ pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
+ 0x0C0D0E0F, 0xC0C0C0C0);
+ emit_move_insn (borrow_shuffle, pat);
+
+ emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+ emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+ emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+ emit_move_insn (zero, CONST0_RTX (V4SImode));
+ emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_bg_v4si (abor, zero, a_abs));
+ emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+ emit_insn (gen_selb (abor, a_abs, abor, asel));
+ emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+ emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+ emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+ emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+ emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+ emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+
+ emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+ DONE;
+ }
+})
+
+(define_expand "cmgt_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+ (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx hi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx b_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx gt_hi = gen_reg_rtx (V4SImode);
+ rtx gt_lo = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+ emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+
+ emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
+ emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
+ emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+ emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+ DONE;
+ }
+})
+
\f
;; clgt
(set_attr "length" "32")])
\f
+;; dftsv
+(define_insn "dftsv_celledp"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dftsv\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "dftsv"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx result = gen_reg_rtx (V4SImode);
+ emit_move_insn (result, CONST0_RTX (V4SImode));
+
+ if (INTVAL (operands[2]))
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx abs = gen_reg_rtx (V4SImode);
+ rtx sign = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+
+ emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (sign, sign, sign, hi_promote));
+ emit_insn (gen_andv4si3 (abs, ra, sign_mask));
+
+ /* NaN or +inf or -inf */
+ if (INTVAL (operands[2]) & 0x70)
+ {
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx isinf = gen_reg_rtx (V4SImode);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
+
+ /* NaN */
+ if (INTVAL (operands[2]) & 0x40)
+ {
+ rtx isnan = gen_reg_rtx (V4SImode);
+ emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
+ emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
+ emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
+ emit_insn (gen_iorv4si3 (result, result, isnan));
+ }
+ /* +inf or -inf */
+ if (INTVAL (operands[2]) & 0x30)
+ {
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
+ emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
+
+ /* +inf */
+ if (INTVAL (operands[2]) & 0x20)
+ {
+ emit_insn (gen_andc_v4si (temp2, isinf, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -inf */
+ if (INTVAL (operands[2]) & 0x10)
+ {
+ emit_insn (gen_andv4si3 (temp2, isinf, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+ }
+
+ /* 0 or denorm */
+ if (INTVAL (operands[2]) & 0xF)
+ {
+ rtx iszero = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+
+ /* denorm */
+ if (INTVAL (operands[2]) & 0x3)
+ {
+ rtx isdenorm = gen_reg_rtx (V4SImode);
+ rtx denorm_mask = gen_reg_rtx (V4SImode);
+ emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
+ emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
+ emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
+ emit_insn (gen_shufb (isdenorm, isdenorm,
+ isdenorm, hi_promote));
+ /* +denorm */
+ if (INTVAL (operands[2]) & 0x2)
+ {
+ emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -denorm */
+ if (INTVAL (operands[2]) & 0x1)
+ {
+ emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+
+ /* 0 */
+ if (INTVAL (operands[2]) & 0xC)
+ {
+ emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
+ /* +0 */
+ if (INTVAL (operands[2]) & 0x8)
+ {
+ emit_insn (gen_andc_v4si (temp2, iszero, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -0 */
+ if (INTVAL (operands[2]) & 0x4)
+ {
+ emit_insn (gen_andv4si3 (temp2, iszero, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+ }
+ }
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
+ DONE;
+ }
+})
+
+
;; branches
(define_insn ""
DONE;
})
+(define_expand "cmpdf"
+ [(set (cc0)
+ (compare (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" "")))]
+ "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ || spu_arch == PROCESSOR_CELLEDP "
+ "{
+ spu_compare_op0 = operands[0];
+ spu_compare_op1 = operands[1];
+ DONE;
+}")
+
+;; vector conditional compare patterns
+(define_expand "vcond<mode>"
+ [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
+ (if_then_else:VCMP
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VCMP 4 "spu_reg_operand" "r")
+ (match_operand:VCMP 5 "spu_reg_operand" "r")])
+ (match_operand:VCMP 1 "spu_reg_operand" "r")
+ (match_operand:VCMP 2 "spu_reg_operand" "r")))]
+ ""
+ {
+ if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+ operands[3], operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+ })
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
+ (if_then_else:VCMPU
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VCMPU 4 "spu_reg_operand" "r")
+ (match_operand:VCMPU 5 "spu_reg_operand" "r")])
+ (match_operand:VCMPU 1 "spu_reg_operand" "r")
+ (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
+ ""
+ {
+ if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+ operands[3], operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+ })
+
\f
;; branch on condition
(define_expand "sminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=r")
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
(match_operand:V4SF 2 "register_operand" "r")))]
""
"
DONE;
}")
+(define_expand "smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=r")
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
+ (match_operand:V2DF 2 "register_operand" "r")))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (V2DImode);
+ emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+ emit_insn (gen_selb (operands[0], operands[2], operands[1],
+ spu_gen_subreg (V4SImode, mask)));
+ DONE;
+}")
+
+(define_expand "sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=r")
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
+ (match_operand:V2DF 2 "register_operand" "r")))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (V2DImode);
+ emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+ emit_insn (gen_selb (operands[0], operands[1], operands[2],
+ spu_gen_subreg (V4SImode, mask)));
+ DONE;
+}")
+
(define_expand "vec_widen_umult_hi_v8hi"
[(set (match_operand:V4SI 0 "register_operand" "=r")
(mult:V4SI