(UNSPEC_MFENCE 44)
(UNSPEC_LFENCE 45)
(UNSPEC_PSADBW 46)
- (UNSPEC_LDQQU 47)
+ (UNSPEC_LDDQU 47)
; Generic math support
(UNSPEC_COPYSIGN 50)
}
operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
}
- if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
- operands[1] = force_reg (SFmode, operands[1]);
})
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,Y")
(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,mY")))]
- "TARGET_SSE2 && TARGET_MIX_SSE_I387
- && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387"
{
switch (which_alternative)
{
(define_insn "*extendsfdf2_sse"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y")
(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "mY")))]
- "TARGET_SSE2 && TARGET_SSE_MATH
- && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "TARGET_SSE2 && TARGET_SSE_MATH"
"cvtss2sd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_insn "*extendsfdf2_i387"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
- "TARGET_80387
- && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "TARGET_80387"
{
switch (which_alternative)
{
}
operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
}
- if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
- operands[1] = force_reg (SFmode, operands[1]);
})
(define_insn "*extendsfxf2_i387"
[(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
- "TARGET_80387
- && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "TARGET_80387"
{
switch (which_alternative)
{
}
operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
}
- if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
- operands[1] = force_reg (DFmode, operands[1]);
})
(define_insn "*extenddfxf2_i387"
[(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))]
- "TARGET_80387
- && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "TARGET_80387"
{
switch (which_alternative)
{
(match_operand:DF 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (DFmode, operands[1]);
-
if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
;
else if (flag_unsafe_math_optimizations)
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
+;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" ""))
+ (set (match_operand:SSEMODEI24 2 "register_operand" "")
+ (fix:SSEMODEI24 (match_dup 0)))]
+ "!TARGET_K8
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" ""))
+ (set (match_operand:SSEMODEI24 2 "register_operand" "")
+ (fix:SSEMODEI24 (match_dup 0)))]
+ "!TARGET_K8
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
+ "")
+
;; Avoid vector decoded forms of the instruction.
(define_peephole2
[(match_scratch:DF 2 "Y")
(match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
&& ix86_binary_operator_ok (MINUS, SImode, operands)"
- "sub{q}\t{%2, %0|%0, %2}"
+ "sub{l}\t{%2, %1|%1, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "DI")])
(define_insn "*negsf2_1"
[(set (match_operand:SF 0 "register_operand" "=f")
(neg:SF (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
"fchs"
[(set_attr "type" "fsgn")
(set_attr "mode" "SF")])
(define_insn "*negdf2_1"
[(set (match_operand:DF 0 "register_operand" "=f")
(neg:DF (match_operand:DF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
"fchs"
[(set_attr "type" "fsgn")
(set_attr "mode" "DF")])
(define_insn "*negxf2_1"
[(set (match_operand:XF 0 "register_operand" "=f")
(neg:XF (match_operand:XF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387"
"fchs"
[(set_attr "type" "fsgn")
(set_attr "mode" "XF")])
(define_insn "*abssf2_1"
[(set (match_operand:SF 0 "register_operand" "=f")
(abs:SF (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
"fabs"
[(set_attr "type" "fsgn")
(set_attr "mode" "SF")])
(define_insn "*absdf2_1"
[(set (match_operand:DF 0 "register_operand" "=f")
(abs:DF (match_operand:DF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
"fabs"
[(set_attr "type" "fsgn")
(set_attr "mode" "DF")])
(define_insn "*absxf2_1"
[(set (match_operand:XF 0 "register_operand" "=f")
(abs:XF (match_operand:XF 1 "register_operand" "0")))]
- "TARGET_80387 && reload_completed"
+ "TARGET_80387"
"fabs"
[(set_attr "type" "fsgn")
(set_attr "mode" "DF")])
(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(ashift:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{q}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "immediate_operand" "e"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
{
switch (get_attr_type (insn))
{
(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
(ashift:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
{
switch (get_attr_type (insn))
{
(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
{
switch (get_attr_type (insn))
{
(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
(ashift:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
{
switch (get_attr_type (insn))
{
(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(ashift:QI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t{%0, %0|%0, %0}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_size))
+ return "sal{b}\t%0";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashlqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))"
{
switch (get_attr_type (insn))
{
(const_string "2")
(const_string "*")))])
+(define_insn "*ashrdi3_one_bit_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(ashiftrt:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "n"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"sar{q}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*ashrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
(define_insn "*ashrsi3_one_bit_cmp_zext"
[(set (reg FLAGS_REG)
(compare
(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
(ashiftrt:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"sar{l}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"sar{l}\t{%2, %k0|%k0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*ashrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
(ashiftrt:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"sar{w}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "HI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*ashrqi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(ashiftrt:QI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "sar{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"sar{b}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "QI")])
+
\f
;; Logical shift instructions
(const_string "2")
(const_string "*")))])
+(define_insn "*lshrdi3_cconly_one_bit_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(lshiftrt:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "e"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"shr{q}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*lshrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
(define_insn "*lshrsi3_cmp_one_bit_zext"
[(set (reg FLAGS_REG)
(compare
(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
(lshiftrt:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"shr{l}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
(set (match_operand:DI 0 "register_operand" "=r")
(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"shr{l}\t{%2, %k0|%k0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "SI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*lshrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
(lshiftrt:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*lshrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"shr{w}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "HI")])
(const_string "2")
(const_string "*")))])
+(define_insn "*lshrqi2_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && (TARGET_SHIFT1 || optimize_size)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(lshiftrt:QI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
+ "shr{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi2_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+ && (optimize_size
+ || !TARGET_PARTIAL_FLAG_REG_STALL)"
"shr{b}\t{%2, %0|%0, %2}"
[(set_attr "type" "ishift")
(set_attr "mode" "QI")])
#else
/* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
The align insn is used to avoid 3 jump instructions in the row to improve
- branch prediction and the benefits hardly outweight the cost of extra 8
+ branch prediction and the benefits hardly outweigh the cost of extra 8
nops on the average inserted by full alignment pseudo operation. */
#endif
return "";
(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
[(set (match_operand:SI 0 "register_operand" "=&a")
(plus:SI
- (plus:SI (match_operand:SI 3 "tp_or_register_operand" "ir")
- (unspec:SI [(match_operand:SI 4 "tls_modbase_operand" "")
- (match_operand:SI 5 "" "")
- (match_operand:SI 2 "register_operand" "b")
- (reg:SI SP_REG)]
- UNSPEC_TLSDESC))
+ (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+ (match_operand:SI 4 "" "")
+ (match_operand:SI 2 "register_operand" "b")
+ (reg:SI SP_REG)]
+ UNSPEC_TLSDESC)
(const:SI (unspec:SI
[(match_operand:SI 1 "tls_symbolic_operand" "")]
UNSPEC_DTPOFF))))
"!TARGET_64BIT && TARGET_GNU2_TLS"
"#"
""
- [(parallel
- [(set (match_dup 0)
- (plus:SI (match_dup 3)
- (match_dup 5)))
- (clobber (reg:CC FLAGS_REG))])]
+ [(set (match_dup 0) (match_dup 5))]
{
operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
[(set (match_operand:DI 0 "register_operand" "=&a")
(plus:DI
- (plus:DI (match_operand:DI 2 "tp_or_register_operand" "ir")
- (unspec:DI [(match_operand:DI 3 "tls_modbase_operand" "")
- (match_operand:DI 4 "" "")
- (reg:DI SP_REG)]
- UNSPEC_TLSDESC))
+ (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+ (match_operand:DI 3 "" "")
+ (reg:DI SP_REG)]
+ UNSPEC_TLSDESC)
(const:DI (unspec:DI
[(match_operand:DI 1 "tls_symbolic_operand" "")]
UNSPEC_DTPOFF))))
"TARGET_64BIT && TARGET_GNU2_TLS"
"#"
""
- [(parallel
- [(set (match_dup 0)
- (plus:DI (match_dup 2)
- (match_dup 4)))
- (clobber (reg:CC FLAGS_REG))])]
+ [(set (match_dup 0) (match_dup 4))]
{
operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(if_then_else (eq_attr "alternative" "1")
- (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
(const_string "ssemul")
(const_string "sseadd"))
- (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
(const_string "fmul")
(const_string "fop"))))
(set_attr "mode" "DF")])
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
- (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
(const_string "ssemul")
(const_string "sseadd")))
(set_attr "mode" "DF")])
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
- (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
(const_string "fmul")
(const_string "fop")))
(set_attr "mode" "DF")])
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(and (eq_attr "alternative" "2")
- (match_operand:SF 3 "mult_operator" ""))
+ (match_operand:DF 3 "mult_operator" ""))
(const_string "ssemul")
(and (eq_attr "alternative" "2")
- (match_operand:SF 3 "div_operator" ""))
+ (match_operand:DF 3 "div_operator" ""))
(const_string "ssediv")
(eq_attr "alternative" "2")
(const_string "sseadd")
"* return output_387_binary_op (insn, operands);"
[(set_attr "mode" "DF")
(set (attr "type")
- (cond [(match_operand:SF 3 "mult_operator" "")
+ (cond [(match_operand:DF 3 "mult_operator" "")
(const_string "ssemul")
- (match_operand:SF 3 "div_operator" "")
+ (match_operand:DF 3 "div_operator" "")
(const_string "ssediv")
]
(const_string "sseadd")))])
(define_insn "sqrtxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
- "TARGET_USE_FANCY_MATH_387
- && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) "
+ "TARGET_USE_FANCY_MATH_387"
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
+;; Make two stack loads independent:
+;; fld aa fld aa
+;; fld %st(0) -> fld bb
+;; fmul bb fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+ [(set (match_operand 0 "fp_register_operand" "")
+ (match_operand 1 "fp_register_operand" ""))
+ (set (match_dup 0)
+ (match_operator 2 "binary_fp_operator"
+ [(match_dup 0)
+ (match_operand 3 "memory_operand" "")]))]
+ "REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_dup 0) (match_dup 3))
+ (set (match_dup 0) (match_dup 4))]
+
+ ;; The % modifier is not operational anymore in peephole2's, so we have to
+ ;; swap the operands manually in the case of addition and multiplication.
+ "if (COMMUTATIVE_ARITH_P (operands[2]))
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[0], operands[1]);
+ else
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[1], operands[0]);")
+
;; Conditional addition patterns
(define_expand "addqicc"
[(match_operand:QI 0 "register_operand" "")
(set (match_scratch:DI 2 "=&r") (const_int 0))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
- "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ else
+ return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ }
[(set_attr "type" "multi")])
(define_expand "stack_protect_test"
UNSPEC_SP_TLS_TEST))
(clobber (match_scratch:DI 3 "=r"))]
"TARGET_64BIT"
- "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}";
+ else
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}";
+ }
[(set_attr "type" "multi")])
(include "sse.md")