X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.md;h=f9be68f9e75521317c94337b5344859763977173;hp=923bdf90660bc5aa441bf7ec1d5a65bddf58109b;hb=062db9f39ed31bb2c570996cf3ee884dfaf7df21;hpb=c949bae8f38cc068f69372c427b1d18f3a007305 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 923bdf90660..f9be68f9e75 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,6 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003, 2004 +;; 2001, 2002, 2003, 2004, 2005, 2006 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -19,8 +19,8 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING. If not, write to -;; the Free Software Foundation, 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. */ +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. */ ;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. @@ -66,59 +66,68 @@ (UNSPEC_STACK_ALLOC 11) (UNSPEC_SET_GOT 12) (UNSPEC_SSE_PROLOGUE_SAVE 13) + (UNSPEC_REG_SAVE 14) + (UNSPEC_DEF_CFA 15) ; TLS support - (UNSPEC_TP 15) - (UNSPEC_TLS_GD 16) - (UNSPEC_TLS_LD_BASE 17) + (UNSPEC_TP 16) + (UNSPEC_TLS_GD 17) + (UNSPEC_TLS_LD_BASE 18) + (UNSPEC_TLSDESC 19) ; Other random patterns (UNSPEC_SCAS 20) - (UNSPEC_SIN 21) - (UNSPEC_COS 22) - (UNSPEC_FNSTSW 24) - (UNSPEC_SAHF 25) - (UNSPEC_FSTCW 26) - (UNSPEC_ADD_CARRY 27) - (UNSPEC_FLDCW 28) + (UNSPEC_FNSTSW 21) + (UNSPEC_SAHF 22) + (UNSPEC_FSTCW 23) + (UNSPEC_ADD_CARRY 24) + (UNSPEC_FLDCW 25) + (UNSPEC_REP 26) + (UNSPEC_EH_RETURN 27) + (UNSPEC_LD_MPIC 28) ; load_macho_picbase ; For SSE/MMX support: - (UNSPEC_FIX 30) - (UNSPEC_MASKMOV 32) - (UNSPEC_MOVMSK 33) - (UNSPEC_MOVNT 34) - (UNSPEC_MOVA 38) - (UNSPEC_MOVU 39) - (UNSPEC_SHUFFLE 41) - (UNSPEC_RCP 42) - (UNSPEC_RSQRT 43) - (UNSPEC_SFENCE 44) - (UNSPEC_NOP 45) ; prevents combiner cleverness - (UNSPEC_PAVGUSB 49) - (UNSPEC_PFRCP 50) - (UNSPEC_PFRCPIT1 51) - (UNSPEC_PFRCPIT2 52) - (UNSPEC_PFRSQRT 53) - (UNSPEC_PFRSQIT1 54) - (UNSPEC_PSHUFLW 55) - (UNSPEC_PSHUFHW 56) - (UNSPEC_MFENCE 59) - (UNSPEC_LFENCE 60) - (UNSPEC_PSADBW 61) - (UNSPEC_ADDSUB 71) - (UNSPEC_HADD 72) - (UNSPEC_HSUB 73) - (UNSPEC_MOVSHDUP 74) - (UNSPEC_MOVSLDUP 75) - (UNSPEC_LDQQU 76) - (UNSPEC_MOVDDUP 77) + (UNSPEC_FIX_NOTRUNC 30) + (UNSPEC_MASKMOV 31) + (UNSPEC_MOVMSK 32) + (UNSPEC_MOVNT 33) + (UNSPEC_MOVU 34) + (UNSPEC_RCP 35) + (UNSPEC_RSQRT 36) + (UNSPEC_SFENCE 37) + (UNSPEC_NOP 38) ; prevents combiner cleverness + (UNSPEC_PFRCP 39) + (UNSPEC_PFRCPIT1 40) + (UNSPEC_PFRCPIT2 41) + (UNSPEC_PFRSQRT 42) + (UNSPEC_PFRSQIT1 43) + (UNSPEC_MFENCE 44) + (UNSPEC_LFENCE 45) + (UNSPEC_PSADBW 46) + (UNSPEC_LDQQU 47) + + ; Generic math support + (UNSPEC_COPYSIGN 50) + (UNSPEC_IEEE_MIN 51) ; not commutative + (UNSPEC_IEEE_MAX 52) ; not commutative ; x87 Floating point - (UNSPEC_FPATAN 65) - (UNSPEC_FYL2X 66) - (UNSPEC_FYL2XP1 67) - (UNSPEC_FRNDINT 68) - (UNSPEC_F2XM1 69) + (UNSPEC_SIN 60) + (UNSPEC_COS 61) + (UNSPEC_FPATAN 62) + (UNSPEC_FYL2X 63) + (UNSPEC_FYL2XP1 64) + (UNSPEC_FRNDINT 65) + (UNSPEC_FIST 66) + (UNSPEC_F2XM1 67) + + ; x87 Rounding + (UNSPEC_FRNDINT_FLOOR 70) + (UNSPEC_FRNDINT_CEIL 71) + (UNSPEC_FRNDINT_TRUNC 72) + (UNSPEC_FRNDINT_MASK_PM 73) + (UNSPEC_FIST_FLOOR 74) + (UNSPEC_FIST_CEIL 75) ; x87 Double output FP (UNSPEC_SINCOS_COS 80) @@ -134,29 +143,28 @@ (UNSPEC_FPREM1_F 90) (UNSPEC_FPREM1_U 91) - ; x87 Rounding - (UNSPEC_FRNDINT_FLOOR 96) - (UNSPEC_FRNDINT_CEIL 97) - (UNSPEC_FRNDINT_TRUNC 98) - (UNSPEC_FRNDINT_MASK_PM 99) - - ; REP instruction - (UNSPEC_REP 75) - - (UNSPEC_EH_RETURN 76) + ; SSP patterns + (UNSPEC_SP_SET 100) + (UNSPEC_SP_TEST 101) + (UNSPEC_SP_TLS_SET 102) + (UNSPEC_SP_TLS_TEST 103) ]) (define_constants [(UNSPECV_BLOCKAGE 0) - (UNSPECV_STACK_PROBE 10) - (UNSPECV_EMMS 31) - (UNSPECV_LDMXCSR 37) - (UNSPECV_STMXCSR 40) - (UNSPECV_FEMMS 46) - (UNSPECV_CLFLUSH 57) - (UNSPECV_ALIGN 68) - (UNSPECV_MONITOR 69) - (UNSPECV_MWAIT 70) + (UNSPECV_STACK_PROBE 1) + (UNSPECV_EMMS 2) + (UNSPECV_LDMXCSR 3) + (UNSPECV_STMXCSR 4) + (UNSPECV_FEMMS 5) + (UNSPECV_CLFLUSH 6) + (UNSPECV_ALIGN 7) + (UNSPECV_MONITOR 8) + (UNSPECV_MWAIT 9) + (UNSPECV_CMPXCHG_1 10) + (UNSPECV_CMPXCHG_2 11) + (UNSPECV_XCHG 12) + (UNSPECV_LOCK 13) ]) ;; Registers by name. @@ -179,7 +187,7 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64" (const (symbol_ref "ix86_tune"))) ;; A basic instruction type. Refinements due to arguments to be @@ -191,8 +199,8 @@ icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, str,cld, - fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint, - sselog,sseiadd,sseishft,sseimul, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, + sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -204,9 +212,9 @@ ;; The CPU unit operations uses. (define_attr "unit" "integer,i387,sse,mmx,unknown" - (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint") + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") - (eq_attr "type" "sselog,sseiadd,sseishft,sseimul, + (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") @@ -240,7 +248,7 @@ (const_int 1) ] (symbol_ref "/* Update immediate_length and other attributes! */ - abort(),1"))) + gcc_unreachable (),1"))) ;; The (bounding maximum) length of an instruction address. (define_attr "length_address" "" @@ -308,8 +316,12 @@ (not (match_operand 0 "memory_operand" ""))) (const_int 0) (and (eq_attr "type" "imov") - (and (match_operand 0 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) + (ior (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" "")) + (ior (and (match_operand 0 "ax_reg_operand" "") + (match_operand 1 "memory_displacement_only_operand" "")) + (and (match_operand 0 "memory_displacement_only_operand" "") + (match_operand 1 "ax_reg_operand" ""))))) (const_int 0) (and (eq_attr "type" "call") (match_operand 0 "constant_call_address_operand" "")) @@ -384,7 +396,7 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_string "none") (const_string "load")) - (and (eq_attr "type" "alu1,negnot,ishift1") + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") (match_operand 1 "memory_operand" "")) (const_string "both") (and (match_operand 0 "memory_operand" "") @@ -398,7 +410,7 @@ "!alu1,negnot,ishift1, imov,imovx,icmp,test, fmov,fcmp,fsgn, - sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt, + sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") @@ -431,13 +443,29 @@ ;; Defines rounding mode of an FP operation. -(define_attr "i387_cw" "floor,ceil,trunc,mask_pm,uninitialized,any" +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" (const_string "any")) ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") (set_attr "type" "multi")]) + +;; All x87 floating point modes +(define_mode_macro X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_macro X87MODEI [HI SI DI]) + +;; All integer modes handled by integer x87 operators. +(define_mode_macro X87MODEI12 [HI SI]) + +;; All SSE floating point modes +(define_mode_macro SSEMODEF [SF DF]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_macro SSEMODEI24 [SI DI]) + ;; Scheduling descriptions @@ -447,9 +475,10 @@ (include "athlon.md") -;; Operand and operator predicates +;; Operand and operator predicates and constraints (include "predicates.md") +(include "constraints.md") ;; Compare instructions. @@ -458,6 +487,19 @@ ;; actually generating RTL. The bCOND or sCOND (emitted immediately ;; after the cmp) will actually emit the cmpM. +(define_expand "cmpti" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "x86_64_general_operand" "")))] + "TARGET_64BIT" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (TImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + (define_expand "cmpdi" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") @@ -760,8 +802,8 @@ (define_expand "cmpxf" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "") - (match_operand:XF 1 "cmp_fp_expander_operand" "")))] + (compare:CC (match_operand:XF 0 "nonmemory_operand" "") + (match_operand:XF 1 "nonmemory_operand" "")))] "TARGET_80387" { ix86_compare_op0 = operands[0]; @@ -773,7 +815,7 @@ [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "") (match_operand:DF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -784,7 +826,7 @@ [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "") (match_operand:SF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" { ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -800,41 +842,26 @@ ;; We may not use "#" to split and emit these, since the REG_DEAD notes ;; used to manage the reg stack popping would not be preserved. -(define_insn "*cmpfp_0_sf" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI - [(compare:CCFP - (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "const0_operand" "X"))] - UNSPEC_FNSTSW))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "multi") - (set_attr "mode" "SF")]) - -(define_insn "*cmpfp_0_df" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI - [(compare:CCFP - (match_operand:DF 1 "register_operand" "f") - (match_operand:DF 2 "const0_operand" "X"))] - UNSPEC_FNSTSW))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "multi") - (set_attr "mode" "DF")]) - -(define_insn "*cmpfp_0_xf" +(define_insn "*cmpfp_0" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "const0_operand" "X"))] + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X"))] UNSPEC_FNSTSW))] - "TARGET_80387" + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") - (set_attr "mode" "XF")]) + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) (define_insn "*cmpfp_sf" [(set (match_operand:HI 0 "register_operand" "=a") @@ -846,6 +873,7 @@ "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "mode" "SF")]) (define_insn "*cmpfp_df" @@ -858,6 +886,7 @@ "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "mode" "DF")]) (define_insn "*cmpfp_xf" @@ -870,6 +899,7 @@ "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "mode" "XF")]) (define_insn "*cmpfp_u" @@ -884,6 +914,7 @@ && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set (attr "mode") (cond [(match_operand:SF 1 "" "") (const_string "SF") @@ -892,21 +923,22 @@ ] (const_string "XF")))]) -(define_insn "*cmpfp_si" +(define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand 1 "register_operand" "f") (match_operator 3 "float_operator" - [(match_operand:SI 2 "memory_operand" "m")]))] + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] UNSPEC_FNSTSW))] - "TARGET_80387 && TARGET_USE_FIOP + "TARGET_80387 && TARGET_USE_MODE_FIOP && FLOAT_MODE_P (GET_MODE (operands[1])) && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") + (set_attr "unit" "i387") (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) ;; FP compares, step 2 ;; Move the fpsw to ax. @@ -934,30 +966,11 @@ ;; Pentium Pro can do steps 1 through 3 in one go. -(define_insn "*cmpfp_i" +(define_insn "*cmpfp_i_mixed" [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_operand 0 "register_operand" "f") - (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && FLOAT_MODE_P (GET_MODE (operands[0])) - && GET_MODE (operands[0]) == GET_MODE (operands[1])" - "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp") - (set (attr "mode") - (cond [(match_operand:SF 1 "" "") - (const_string "SF") - (match_operand:DF 1 "" "") - (const_string "DF") - ] - (const_string "XF"))) - (set_attr "athlon_decode" "vector")]) - -(define_insn "*cmpfp_i_sse" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_operand 0 "register_operand" "f#x,x#f") - (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] - "TARGET_80387 + (compare:CCFP (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" @@ -968,11 +981,12 @@ (const_string "DF"))) (set_attr "athlon_decode" "vector")]) -(define_insn "*cmpfp_i_sse_only" +(define_insn "*cmpfp_i_sse" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "x") (match_operand 1 "nonimmediate_operand" "xm")))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "ssecomi") @@ -982,15 +996,15 @@ (const_string "DF"))) (set_attr "athlon_decode" "vector")]) -(define_insn "*cmpfp_iu" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (match_operand 0 "register_operand" "f") - (match_operand 1 "register_operand" "f")))] +(define_insn "*cmpfp_i_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] "TARGET_80387 && TARGET_CMOVE - && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) && FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" - "* return output_fp_compare (insn, operands, 1, 1);" + "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") (set (attr "mode") (cond [(match_operand:SF 1 "" "") @@ -1001,11 +1015,11 @@ (const_string "XF"))) (set_attr "athlon_decode" "vector")]) -(define_insn "*cmpfp_iu_sse" +(define_insn "*cmpfp_iu_mixed" [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (match_operand 0 "register_operand" "f#x,x#f") - (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] - "TARGET_80387 + (compare:CCFPU (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" @@ -1016,11 +1030,12 @@ (const_string "DF"))) (set_attr "athlon_decode" "vector")]) -(define_insn "*cmpfp_iu_sse_only" +(define_insn "*cmpfp_iu_sse" [(set (reg:CCFPU FLAGS_REG) (compare:CCFPU (match_operand 0 "register_operand" "x") (match_operand 1 "nonimmediate_operand" "xm")))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "ssecomi") @@ -1029,6 +1044,25 @@ (const_string "SF") (const_string "DF"))) (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector")]) ;; Move instructions. @@ -1123,59 +1157,35 @@ (define_insn "*movsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r ,m ,!*y,!rm,!*y,!*x,!rm,!*x") + "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r,m ,?*Y,*x") (match_operand:SI 1 "general_operand" - "rinm,rin,*y ,*y ,rm ,*x ,*x ,rm"))] - "(TARGET_INTER_UNIT_MOVES || optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Y,*x,r ,m "))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { - case TYPE_SSEMOV: + case TYPE_SSELOG1: if (get_attr_mode (insn) == MODE_TI) - return "movdqa\t{%1, %0|%0, %1}"; - return "movd\t{%1, %0|%0, %1}"; - - case TYPE_MMXMOV: - if (get_attr_mode (insn) == MODE_DI) - return "movq\t{%1, %0|%0, %1}"; - return "movd\t{%1, %0|%0, %1}"; - - case TYPE_LEA: - return "lea{l}\t{%1, %0|%0, %1}"; - - default: - if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) - abort(); - return "mov{l}\t{%1, %0|%0, %1}"; - } -} - [(set (attr "type") - (cond [(eq_attr "alternative" "2,3,4") - (const_string "mmxmov") - (eq_attr "alternative" "5,6,7") - (const_string "ssemov") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:SI 1 "symbolic_operand" "")) - (const_string "lea") - ] - (const_string "imov"))) - (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")]) + return "pxor\t%0, %0"; + return "xorps\t%0, %0"; -(define_insn "*movsi_1_nointernunit" - [(set (match_operand:SI 0 "nonimmediate_operand" - "=r ,m ,!*y,!m,!*y,!*x,!m,!*x") - (match_operand:SI 1 "general_operand" - "rinm,rin,*y ,*y,m ,*x ,*x,m"))] - "(!TARGET_INTER_UNIT_MOVES && !optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (get_attr_type (insn)) - { case TYPE_SSEMOV: - if (get_attr_mode (insn) == MODE_TI) - return "movdqa\t{%1, %0|%0, %1}"; - return "movd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "movd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "movss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case TYPE_MMXADD: + return "pxor\t%0, %0"; case TYPE_MMXMOV: if (get_attr_mode (insn) == MODE_DI) @@ -1186,22 +1196,36 @@ return "lea{l}\t{%1, %0|%0, %1}"; default: - if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) - abort(); + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); return "mov{l}\t{%1, %0|%0, %1}"; } } [(set (attr "type") - (cond [(eq_attr "alternative" "2,3,4") + (cond [(eq_attr "alternative" "2") + (const_string "mmxadd") + (eq_attr "alternative" "3,4,5") (const_string "mmxmov") - (eq_attr "alternative" "5,6,7") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") (const_string "ssemov") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:SI 1 "symbolic_operand" "")) + (match_operand:DI 1 "pic_32bit_operand" "") (const_string "lea") ] (const_string "imov"))) - (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address @@ -1253,14 +1277,12 @@ "ix86_expand_move (HImode, operands); DONE;") (define_insn "*pushhi2" - [(set (match_operand:HI 0 "push_operand" "=<,<") - (match_operand:HI 1 "general_no_elim_operand" "n,r*m"))] + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] "!TARGET_64BIT" - "@ - push{w}\t{|WORD PTR }%1 - push{w}\t%1" + "push{l}\t%k1" [(set_attr "type" "push") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For 64BIT abi we always round up to 8 bytes. (define_insn "*pushhi2_rex64" @@ -1269,7 +1291,7 @@ "TARGET_64BIT" "push{q}\t%q1" [(set_attr "type" "push") - (set_attr "mode" "QI")]) + (set_attr "mode" "DI")]) (define_insn "*movhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") @@ -1414,18 +1436,16 @@ "ix86_expand_move (QImode, operands); DONE;") ;; emit_push_insn when it calls move_by_pieces requires an insn to -;; "push a byte". But actually we use pushw, which has the effect -;; of rounding the amount pushed up to a halfword. +;; "push a byte". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. (define_insn "*pushqi2" - [(set (match_operand:QI 0 "push_operand" "=X,X") - (match_operand:QI 1 "nonmemory_no_elim_operand" "n,r"))] + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))] "!TARGET_64BIT" - "@ - push{w}\t{|word ptr }%1 - push{w}\t%w1" + "push{l}\t%k1" [(set_attr "type" "push") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For 64BIT abi we always round up to 8 bytes. (define_insn "*pushqi2_rex64" @@ -1434,7 +1454,7 @@ "TARGET_64BIT" "push{q}\t%q1" [(set_attr "type" "push") - (set_attr "mode" "QI")]) + (set_attr "mode" "DI")]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for @@ -1454,8 +1474,7 @@ switch (get_attr_type (insn)) { case TYPE_IMOVX: - if (!ANY_QI_REG_P (operands[1]) && GET_CODE (operands[1]) != MEM) - abort (); + gcc_assert (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM); return "movz{bl|x}\t{%1, %k0|%k0, %1}"; default: if (get_attr_mode (insn) == MODE_SI) @@ -1465,7 +1484,10 @@ } } [(set (attr "type") - (cond [(ne (symbol_ref "optimize_size") (const_int 0)) + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_size") (const_int 0)) (const_string "imov") (and (eq_attr "alternative" "3") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") @@ -1490,8 +1512,12 @@ (const_string "SI") (and (eq_attr "type" "imov") (and (eq_attr "alternative" "0,1") - (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") - (const_int 0)))) + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_size") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) (const_string "SI") ;; Avoid partial register stalls when not using QImode arithmetic (and (eq_attr "type" "imov") @@ -1513,8 +1539,7 @@ rtx op0, op1, op2; op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; - if (reg_overlap_mentioned_p (op2, op0)) - abort (); + gcc_assert (!reg_overlap_mentioned_p (op2, op0)); if (! q_regs_operand (op1, QImode)) { emit_insn (gen_movqi (op2, op1)); @@ -1682,6 +1707,16 @@ (set_attr "memory" "load") (set_attr "mode" "QI")]) +(define_insn "*movdi_extzv_1" + [(set (match_operand:DI 0 "register_operand" "=R") + (zero_extract:DI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + (define_insn "*movsi_extzv_1" [(set (match_operand:SI 0 "register_operand" "=R") (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") @@ -1831,7 +1866,8 @@ (define_split [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "immediate_operand" ""))] - "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed) + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 0) (match_dup 1)) @@ -1899,20 +1935,27 @@ (set_attr "length_immediate" "1")]) (define_insn "*movdi_2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,!*x,!*x") - (match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*x,*x,m"))] - "!TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Y,m ,*Y,*Y,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Y,*Y,m ,C ,*x,*x,m "))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ # # + pxor\t%0, %0 movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1} + pxor\t%0, %0 movq\t{%1, %0|%0, %1} movdqa\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,mmx,mmx,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")]) + movq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) (define_split [(set (match_operand:DI 0 "push_operand" "") @@ -1934,17 +1977,15 @@ (define_insn "*movdi_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,r ,r,mr,!mr,!*y,!rm,!*y,!*x,!rm,!*x,!*x,!*y") + "=r,r ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y") (match_operand:DI 1 "general_operand" - "Z ,rem,i,re,n ,*y ,*y ,rm ,*x ,*x ,rm ,*y ,*x"))] - "TARGET_64BIT - && (TARGET_INTER_UNIT_MOVES || optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_SSECVT: - if (which_alternative == 11) + if (which_alternative == 13) return "movq2dq\t{%1, %0|%0, %1}"; else return "movdq2q\t{%1, %0|%0, %1}"; @@ -1958,13 +1999,15 @@ if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) return "movd\t{%1, %0|%0, %1}"; return "movq\t{%1, %0|%0, %1}"; + case TYPE_SSELOG1: + case TYPE_MMXADD: + return "pxor\t%0, %0"; case TYPE_MULTI: return "#"; case TYPE_LEA: return "lea{q}\t{%a1, %0|%0, %a1}"; default: - if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) - abort (); + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else if (which_alternative == 2) @@ -1974,70 +2017,25 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "5,6,7") + (cond [(eq_attr "alternative" "5") + (const_string "mmxadd") + (eq_attr "alternative" "6,7,8") (const_string "mmxmov") - (eq_attr "alternative" "8,9,10") + (eq_attr "alternative" "9") + (const_string "sselog1") + (eq_attr "alternative" "10,11,12") (const_string "ssemov") - (eq_attr "alternative" "11,12") + (eq_attr "alternative" "13,14") (const_string "ssecvt") (eq_attr "alternative" "4") (const_string "multi") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:DI 1 "symbolic_operand" "")) + (match_operand:DI 1 "pic_32bit_operand" "") (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI,DI,DI")]) - -(define_insn "*movdi_1_rex64_nointerunit" - [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,r ,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y") - (match_operand:DI 1 "general_operand" - "Z,rem,i,re,n ,*y ,*y,m ,*Y ,*Y,m"))] - "TARGET_64BIT - && (!TARGET_INTER_UNIT_MOVES && !optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (get_attr_type (insn)) - { - case TYPE_SSEMOV: - if (get_attr_mode (insn) == MODE_TI) - return "movdqa\t{%1, %0|%0, %1}"; - /* FALLTHRU */ - case TYPE_MMXMOV: - return "movq\t{%1, %0|%0, %1}"; - case TYPE_MULTI: - return "#"; - case TYPE_LEA: - return "lea{q}\t{%a1, %0|%0, %a1}"; - default: - if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) - abort (); - if (get_attr_mode (insn) == MODE_SI) - return "mov{l}\t{%k1, %k0|%k0, %k1}"; - else if (which_alternative == 2) - return "movabs{q}\t{%1, %0|%0, %1}"; - else - return "mov{q}\t{%1, %0|%0, %1}"; - } -} - [(set (attr "type") - (cond [(eq_attr "alternative" "5,6,7") - (const_string "mmxmov") - (eq_attr "alternative" "8,9,10") - (const_string "ssemov") - (eq_attr "alternative" "4") - (const_string "multi") - (and (ne (symbol_ref "flag_pic") (const_int 0)) - (match_operand:DI 1 "symbolic_operand" "")) - (const_string "lea") - ] - (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address @@ -2098,7 +2096,8 @@ (define_split [(set (match_operand:DI 0 "memory_operand" "") (match_operand:DI 1 "immediate_operand" ""))] - "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed) + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) @@ -2117,46 +2116,134 @@ (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector")]) -(define_expand "movsf" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (match_operand:SF 1 "general_operand" ""))] - "" - "ix86_expand_move (SFmode, operands); DONE;") +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) -(define_insn "*pushsf" - [(set (match_operand:SF 0 "push_operand" "=<,<,<") - (match_operand:SF 1 "general_no_elim_operand" "f#rx,rFm#fx,x#rf"))] - "!TARGET_64BIT" +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; case 1: - return "push{l}\t%1"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_size") (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; default: - /* This insn should be already split before reg-stack. */ - abort (); + gcc_unreachable (); } } + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "ix86_expand_move (SFmode, operands); DONE;") + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,SI,SF")]) (define_insn "*pushsf_rex64" [(set (match_operand:SF 0 "push_operand" "=X,X,X") - (match_operand:SF 1 "nonmemory_no_elim_operand" "f#rx,rF#fx,x#rf"))] + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] "TARGET_64BIT" { - switch (which_alternative) - { - case 1: - return "push{q}\t%q1"; - - default: - /* This insn should be already split before reg-stack. */ - abort (); - } + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; } [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,DI,SF")]) (define_split @@ -2164,11 +2251,10 @@ (match_operand:SF 1 "memory_operand" ""))] "reload_completed && GET_CODE (operands[1]) == MEM - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))" + && constant_pool_reference_p (operands[1])" [(set (match_dup 0) (match_dup 1))] - "operands[1] = get_pool_constant (XEXP (operands[1], 0));") + "operands[1] = avoid_constant_pool_reference (operands[1]);") ;; %%% Kill this when call knows how to work this out. @@ -2188,101 +2274,10 @@ (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" - "=f#xr,m ,f#xr,r#xf ,m ,x#rf,x#rf,x#rf ,m ,!*y,!rm,!*y") - (match_operand:SF 1 "general_operand" - "fm#rx,f#rx,G ,rmF#fx,Fr#fx,C ,x ,xm#rf,x#rf,rm ,*y ,*y"))] - "(TARGET_INTER_UNIT_MOVES || optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (reload_in_progress || reload_completed - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], SFmode))" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: - case 4: - return "mov{l}\t{%1, %0|%0, %1}"; - case 5: - if (get_attr_mode (insn) == MODE_TI) - return "pxor\t%0, %0"; - else - return "xorps\t%0, %0"; - case 6: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movss\t{%1, %0|%0, %1}"; - case 7: - case 8: - return "movss\t{%1, %0|%0, %1}"; - - case 9: - case 10: - return "movd\t{%1, %0|%0, %1}"; - - case 11: - return "movq\t{%1, %0|%0, %1}"; - - default: - abort(); - } -} - [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") - (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,9,10") - (const_string "SI") - (eq_attr "alternative" "5") - (if_then_else - (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE2") - (const_int 0))) - (eq (symbol_ref "optimize_size") - (const_int 0))) - (const_string "TI") - (const_string "V4SF")) - /* For architectures resolving dependencies on - whole SSE registers use APS move to break dependency - chains, otherwise use short move to avoid extra work. - - Do the same for architectures resolving dependencies on - the parts. While in DF mode it is better to always handle - just register parts, the SF mode is different due to lack - of instructions to load just part of the register. It is - better to maintain the whole registers in single format - to avoid problems on using packed logical operations. */ - (eq_attr "alternative" "6") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") - (const_int 0))) - (const_string "V4SF") - (const_string "SF")) - (eq_attr "alternative" "11") - (const_string "DI")] - (const_string "SF")))]) - -(define_insn "*movsf_1_nointerunit" - [(set (match_operand:SF 0 "nonimmediate_operand" - "=f#xr,m ,f#xr,r#xf ,m ,x#rf,x#rf,x#rf ,m ,!*y,!m,!*y") + "=f,m ,f,r ,m ,x,x,x ,m ,!*y,!rm,!*y") (match_operand:SF 1 "general_operand" - "fm#rx,f#rx,G ,rmF#fx,Fr#fx,C ,x ,xm#rf,x#rf,m ,*y,*y"))] - "(!TARGET_INTER_UNIT_MOVES && !optimize_size) - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + "fm,f,G ,rmF,Fr,C ,x ,xm,x,rm ,*y ,*y"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2327,10 +2322,10 @@ return "movq\t{%1, %0|%0, %1}"; default: - abort(); + gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") (set (attr "mode") (cond [(eq_attr "alternative" "3,4,9,10") (const_string "SI") @@ -2394,24 +2389,26 @@ (define_insn "*pushdf_nointeger" [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))] + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y"))] "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" { /* This insn should be already split before reg-stack. */ - abort (); + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") (set_attr "mode" "DF,SI,SI,DF")]) (define_insn "*pushdf_integer" [(set (match_operand:DF 0 "push_operand" "=<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))] + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y"))] "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" { /* This insn should be already split before reg-stack. */ - abort (); + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "DF,SI,DF")]) ;; %%% Kill this when call knows how to work this out. @@ -2444,9 +2441,9 @@ (define_insn "*movdf_nointeger" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f#x,m ,f#x,*r ,o ,x#f,x#f,x#f ,m") + "=f,m,f,*r ,o ,Y*x,Y*x,Y*x ,m ") (match_operand:DF 1 "general_operand" - "fm#x,f#x,G ,*roF,F*r,C ,x#f,xHm#f,x#f"))] + "fm,f,G,*roF,F*r,C ,Y*x,mY*x,Y*x"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed @@ -2481,7 +2478,7 @@ case MODE_TI: return "pxor\t%0, %0"; default: - abort (); + gcc_unreachable (); } case 6: case 7: @@ -2500,28 +2497,29 @@ return "movsd\t{%1, %0|%0, %1}"; case MODE_V1DF: return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; default: - abort (); + gcc_unreachable (); } default: - abort(); + gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") (set (attr "mode") - (cond [(eq_attr "alternative" "3,4") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") (const_string "SI") /* For SSE1, we have many fewer alternatives. */ (eq (symbol_ref "TARGET_SSE2") (const_int 0)) (cond [(eq_attr "alternative" "5,6") - (if_then_else - (ne (symbol_ref "optimize_size") (const_int 0)) - (const_string "V4SF") - (const_string "TI")) + (const_string "V4SF") ] - (const_string "DI")) + (const_string "V2SF")) /* xorps is one byte shorter. */ (eq_attr "alternative" "5") @@ -2563,9 +2561,9 @@ (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f#Yr,m ,f#Yr,r#Yf ,o ,Y#rf,Y#rf,Y#rf ,m") + "=f,m,f,r ,o ,Y*x,Y*x,Y*x,m ") (match_operand:DF 1 "general_operand" - "fm#Yr,f#Yr,G ,roF#Yf,Fr#Yf,C ,Y#rf,Ym#rf,Y#rf"))] + "fm,f,G,roF,Fr,C ,Y*x,m ,Y*x"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT) && (reload_in_progress || reload_completed @@ -2601,7 +2599,7 @@ case MODE_TI: return "pxor\t%0, %0"; default: - abort (); + gcc_unreachable (); } case 6: case 7: @@ -2620,28 +2618,29 @@ return "movsd\t{%1, %0|%0, %1}"; case MODE_V1DF: return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; default: - abort (); + gcc_unreachable (); } default: - abort(); + gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") (set (attr "mode") - (cond [(eq_attr "alternative" "3,4") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") (const_string "SI") /* For SSE1, we have many fewer alternatives. */ (eq (symbol_ref "TARGET_SSE2") (const_int 0)) (cond [(eq_attr "alternative" "5,6") - (if_then_else - (ne (symbol_ref "optimize_size") (const_int 0)) - (const_string "V4SF") - (const_string "TI")) + (const_string "V4SF") ] - (const_string "DI")) + (const_string "V2SF")) /* xorps is one byte shorter. */ (eq_attr "alternative" "5") @@ -2729,20 +2728,22 @@ "optimize_size" { /* This insn should be already split before reg-stack. */ - abort (); + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") (set_attr "mode" "XF,SI,SI")]) (define_insn "*pushxf_integer" [(set (match_operand:XF 0 "push_operand" "=<,<") - (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] "!optimize_size" { /* This insn should be already split before reg-stack. */ - abort (); + gcc_unreachable (); } [(set_attr "type" "multi") + (set_attr "unit" "i387,*") (set_attr "mode" "XF,SI")]) (define_split @@ -2799,15 +2800,16 @@ case 3: case 4: return "#"; + default: + gcc_unreachable (); } - abort(); } [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) (define_insn "*movxf_integer" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") - (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] "!optimize_size && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed @@ -2832,8 +2834,10 @@ case 3: case 4: return "#"; + + default: + gcc_unreachable (); } - abort(); } [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) @@ -2860,11 +2864,10 @@ && GET_CODE (operands[1]) == MEM && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))" + && constant_pool_reference_p (operands[1])" [(set (match_dup 0) (match_dup 1))] { - rtx c = get_pool_constant (XEXP (operands[1], 0)); + rtx c = avoid_constant_pool_reference (operands[1]); rtx r = operands[0]; if (GET_CODE (r) == SUBREG) @@ -2900,6 +2903,67 @@ } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") ;; Zero extension instructions @@ -2968,13 +3032,14 @@ [(set_attr "type" "imovx,alu1") (set_attr "mode" "HI")]) -(define_insn "*zero_extendqihi2_movzbw" +; zero extend to SImode here to avoid partial register stalls +(define_insn "*zero_extendqihi2_movzbl" [(set (match_operand:HI 0 "register_operand" "=r") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" - "movz{bw|x}\t{%1, %0|%0, %1}" + "movz{bl|x}\t{%1, %k0|%k0, %k1}" [(set_attr "type" "imovx") - (set_attr "mode" "HI")]) + (set_attr "mode" "SI")]) ;; For the movzbw case strip only the clobber (define_split @@ -3096,24 +3161,10 @@ ") (define_insn "zero_extendsidi2_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,m,m"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && !TARGET_INTER_UNIT_MOVES" - "@ - # - # - # - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "mode" "SI,SI,SI,DI,TI") - (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) - -(define_insn "*zero_extendsidi2_32_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Y") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm"))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES" + "!TARGET_64BIT" "@ # # @@ -3124,21 +3175,9 @@ (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) (define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!?Y") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,m,m")))] - "TARGET_64BIT && !TARGET_INTER_UNIT_MOVES" - "@ - mov\t{%k1, %k0|%k0, %k1} - # - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx,imov,mmxmov,ssemov") - (set_attr "mode" "SI,DI,DI,TI")]) - -(define_insn "*zero_extendsidi2_rex64_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!*?") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Y") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))] - "TARGET_64BIT && TARGET_INTER_UNIT_MOVES" + "TARGET_64BIT" "@ mov\t{%k1, %k0|%k0, %k1} # @@ -3174,24 +3213,20 @@ "split_di (&operands[0], 1, &operands[3], &operands[4]);") (define_insn "zero_extendhidi2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] "TARGET_64BIT" - "@ - movz{wl|x}\t{%1, %k0|%k0, %1} - movz{wq|x}\t{%1, %0|%0, %1}" + "movz{wl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") - (set_attr "mode" "SI,DI")]) + (set_attr "mode" "DI")]) (define_insn "zero_extendqidi2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "Q,m")))] + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))] "TARGET_64BIT" - "@ - movz{bl|x}\t{%1, %k0|%k0, %1} - movz{bq|x}\t{%1, %0|%0, %1}" + "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") - (set_attr "mode" "SI,DI")]) + (set_attr "mode" "DI")]) ;; Sign extension instructions @@ -3487,14 +3522,24 @@ /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ if (GET_CODE (operands[1]) == CONST_DOUBLE) - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); -}) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); +}) (define_insn "*extendsfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m#fY,Y#f") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm#Y,f#Y,mY#f")))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,Y") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,mY")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { @@ -3513,14 +3558,14 @@ return "cvtss2sd\t{%1, %0|%0, %1}"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov,fmov,ssecvt") (set_attr "mode" "SF,XF,DF")]) (define_insn "*extendsfdf2_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "mY")))] "TARGET_SSE2 && TARGET_SSE_MATH && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" @@ -3546,7 +3591,7 @@ return "fst%z0\t%y0"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov") @@ -3560,7 +3605,16 @@ /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ if (GET_CODE (operands[1]) == CONST_DOUBLE) - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -3585,7 +3639,7 @@ return "fstp%z0\t%y0\n\tfld%z0\t%y0"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov") @@ -3599,7 +3653,16 @@ /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ if (GET_CODE (operands[1]) == CONST_DOUBLE) - operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], DFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); + } if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (DFmode, operands[1]); }) @@ -3624,7 +3687,7 @@ return "fstp%z0\t%y0"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov") @@ -3653,7 +3716,7 @@ ; else { - rtx temp = assign_386_stack_local (SFmode, 0); + rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP); emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); DONE; } @@ -3683,7 +3746,7 @@ case 2: return "cvtsd2ss\t{%1, %0|%0, %1}"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov,fmov,ssecvt") @@ -3728,10 +3791,11 @@ case 2: return "cvtsd2ss\t{%1, %0|%0, %1}"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov,multi,ssecvt") + (set_attr "unit" "*,i387,*") (set_attr "mode" "SF")]) (define_insn "*truncdfsf_i387" @@ -3751,10 +3815,27 @@ case 1: return "#"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") (set_attr "mode" "SF")]) (define_split @@ -3787,28 +3868,24 @@ DONE; } else - operands[2] = assign_386_stack_local (SFmode, 0); + operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP); }) (define_insn "*truncxfsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x") (float_truncate:SF (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] "TARGET_MIX_SSE_I387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; } [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") (set_attr "mode" "SF")]) (define_insn "truncxfsf2_i387_noop" @@ -3822,24 +3899,20 @@ (set_attr "mode" "SF")]) (define_insn "*truncxfsf2_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r") (float_truncate:SF (match_operand:XF 1 "register_operand" "f,f,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))] "TARGET_80387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort (); - } + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; } [(set_attr "type" "fmov,multi,multi") + (set_attr "unit" "*,i387,i387") (set_attr "mode" "SF")]) (define_insn "*truncxfsf2_i387_1" @@ -3893,29 +3966,24 @@ DONE; } else - operands[2] = assign_386_stack_local (DFmode, 0); + operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP); }) (define_insn "*truncxfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Y") (float_truncate:DF (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] "TARGET_SSE2 && TARGET_MIX_SSE_I387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } - abort (); + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; } [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") (set_attr "mode" "DF")]) (define_insn "truncxfdf2_i387_noop" @@ -3929,24 +3997,20 @@ (set_attr "mode" "DF")]) (define_insn "*truncxfdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#r,?r#f") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r") (float_truncate:DF (match_operand:XF 1 "register_operand" "f,f,f"))) (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))] "TARGET_80387" { - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort (); - } + gcc_assert (!which_alternative); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; } [(set_attr "type" "fmov,multi,multi") + (set_attr "unit" "*,i387,i387") (set_attr "mode" "DF")]) (define_insn "*truncxfdf2_i387_1" @@ -3982,8 +4046,6 @@ [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] "") -;; %%% Break up all these bad boys. - ;; Signed conversion to DImode. (define_expand "fix_truncxfdi2" @@ -3991,99 +4053,287 @@ (fix:DI (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" - "") +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) -(define_expand "fix_truncdfdi2" +(define_expand "fix_truncdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:DF 1 "register_operand" ""))) + (fix:DI (match_operand:SSEMODEF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2)" + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { - if (TARGET_64BIT && TARGET_SSE2) + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncdfdi_sse (out, operands[1])); + emit_insn (gen_fix_truncdi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) -(define_expand "fix_truncsfdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE)" +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" { - if (TARGET_64BIT && TARGET_SSE) + if (TARGET_FISTTP) { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncsfdi_sse (out, operands[1])); + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:SSEMODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncsi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) +;; Signed conversion to HImode. + +(define_expand "fix_trunchi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncsfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttss2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncdfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] + "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttsd2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncsfsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "fix_truncdfsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] + "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_insn_and_split "fix_trunc_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])] + "") + ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncdi_i387" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && !(reload_completed || reload_in_progress)" "#" "&& 1" [(const_int 0)] { - ix86_optimize_mode_switching = 1; - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncdi_memory (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); else { - operands[4] = assign_386_stack_local (DImode, 0); - emit_insn (gen_fix_truncdi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); } DONE; } [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") (set_attr "mode" "DI")]) -(define_insn "fix_truncdi_nomemory" +(define_insn "fix_truncdi_i387_with_temp" [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") (fix:DI (match_operand 1 "register_operand" "f,f"))) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) (clobber (match_operand:DI 4 "memory_operand" "=m,m")) - (clobber (match_scratch:DF 5 "=&1f,&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "#" [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") (set_attr "mode" "DI")]) -(define_insn "fix_truncdi_memory" - [(set (match_operand:DI 0 "memory_operand" "=m") - (fix:DI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m")) - (clobber (match_scratch:DF 4 "=&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "DI")]) - (define_split [(set (match_operand:DI 0 "register_operand" "") (fix:DI (match_operand 1 "register_operand" ""))) @@ -4113,313 +4363,58 @@ (clobber (match_dup 5))])] "") -;; When SSE available, it is always faster to use it! -(define_insn "fix_truncsfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE" - "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:SF 2 "x") - (set (match_operand:DI 0 "register_operand" "") - (fix:DI (match_operand:SF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:DI (match_dup 2)))] - "") +(define_insn "fix_trunc_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) -(define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] - "TARGET_64BIT && TARGET_SSE2" - "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt,sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) +(define_insn "fix_trunc_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:DF 2 "Y") - (set (match_operand:DI 0 "register_operand" "") - (fix:DI (match_operand:DF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:DI (match_dup 2)))] +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] "") -;; Signed conversion to SImode. - -(define_expand "fix_truncxfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387" - "") - -(define_expand "fix_truncdfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || TARGET_SSE2" -{ - if (TARGET_SSE2) - { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncdfsi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); - DONE; - } -}) - -(define_expand "fix_truncsfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || TARGET_SSE" -{ - if (TARGET_SSE) - { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncsfsi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); - DONE; - } -}) - -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncsi_i387" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - "&& 1" - [(const_int 0)] -{ - ix86_optimize_mode_switching = 1; - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncsi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (SImode, 0); - emit_insn (gen_fix_truncsi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) - -(define_insn "fix_truncsi_nomemory" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) - -(define_insn "fix_truncsi_memory" - [(set (match_operand:SI 0 "memory_operand" "=m") - (fix:SI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) - -;; When SSE available, it is always faster to use it! -(define_insn "fix_truncsfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) - -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:SF 2 "x") - (set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand:SF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SI (match_dup 2)))] - "") - -(define_insn "fix_truncdfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] - "TARGET_SSE2" - "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) - -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:DF 2 "Y") - (set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand:DF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SI (match_dup 2)))] - "") - -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:SI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))] - "") - -(define_split - [(set (match_operand:SI 0 "memory_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:SI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))])] - "") - -;; Signed conversion to HImode. - -(define_expand "fix_truncxfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387" - "") - -(define_expand "fix_truncdfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 && !TARGET_SSE2" - "") - -(define_expand "fix_truncsfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 && !TARGET_SSE" - "") - -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_trunchi_i387" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - "&& 1" - [(const_int 0)] -{ - ix86_optimize_mode_switching = 1; - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_trunchi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (HImode, 0); - emit_insn (gen_fix_trunchi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) - -(define_insn "fix_trunchi_nomemory" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:HI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) - -(define_insn "fix_trunchi_memory" - [(set (match_operand:HI 0 "memory_operand" "=m") - (fix:HI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) - (define_split - [(set (match_operand:HI 0 "memory_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] "reload_completed" - [(parallel [(set (match_dup 0) (fix:HI (match_dup 1))) + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) (use (match_dup 3))])] "") -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:HI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 4))]) - (set (match_dup 0) (match_dup 4))] - "") - (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") (unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))] @@ -4466,6 +4461,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "SF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_expand "floatsisf2" @@ -4475,7 +4471,7 @@ "") (define_insn "*floatsisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f") + [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_MIX_SSE_I387" "@ @@ -4485,6 +4481,7 @@ cvtsi2ss\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "SF") + (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) @@ -4507,6 +4504,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "SF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_expand "floatdisf2" @@ -4516,7 +4514,7 @@ "") (define_insn "*floatdisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f") + [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_64BIT && TARGET_MIX_SSE_I387" "@ @@ -4526,6 +4524,7 @@ cvtsi2ss{q}\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "SF") + (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) @@ -4548,6 +4547,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "SF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_expand "floathidf2" @@ -4572,6 +4572,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "DF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_expand "floatsidf2" @@ -4581,7 +4582,7 @@ "") (define_insn "*floatsidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f") + [(set (match_operand:DF 0 "register_operand" "=f,?f,Y,Y") (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387" "@ @@ -4591,6 +4592,7 @@ cvtsi2sd\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "DF") + (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") (set_attr "fp_int_src" "true")]) @@ -4613,6 +4615,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "DF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_expand "floatdidf2" @@ -4622,7 +4625,7 @@ "") (define_insn "*floatdidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f") + [(set (match_operand:DF 0 "register_operand" "=f,?f,Y,Y") (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387" "@ @@ -4632,6 +4635,7 @@ cvtsi2sd{q}\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "DF") + (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") (set_attr "fp_int_src" "true")]) @@ -4654,6 +4658,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "DF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_insn "floathixf2" @@ -4665,6 +4670,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "XF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_insn "floatsixf2" @@ -4676,6 +4682,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "XF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) (define_insn "floatdixf2" @@ -4687,6 +4694,7 @@ #" [(set_attr "type" "fmov,multi") (set_attr "mode" "XF") + (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) ;; %%% Kill these when reload knows how to do it. @@ -4725,164 +4733,44 @@ ;; SSE extract/set expanders -(define_expand "vec_setv2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:DF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE2" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1])); - break; - case 1: - emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1])); - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_extractv2df" - [(match_operand:DF 0 "register_operand" "") - (match_operand:V2DF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE2" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse2_storelpd (operands[0], operands[1])); - break; - case 1: - emit_insn (gen_sse2_storehpd (operands[0], operands[1])); - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_initv2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_SSE2" -{ - ix86_expand_vector_init (operands[0], operands[1]); - DONE; -}) - -(define_expand "vec_setv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse_movss (operands[0], operands[0], - simplify_gen_subreg (V4SFmode, operands[1], - SFmode, 0))); - break; - case 1: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_unpcklps (operands[0], operands[0], operands[0])); - emit_insn (gen_sse_movss (operands[0], operands[0], op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (1 + (0<<2) + (2<<4) + (3<<6)))); - } - break; - case 2: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); + +;; Add instructions - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_movss (tmp, tmp, op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (0 + (1<<2) + (0<<4) + (3<<6)))); - } - break; - case 3: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); +;; %%% splits for addditi3 - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_movss (tmp, tmp, op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (0 + (1<<2) + (2<<4) + (0<<6)))); - } - break; - default: - abort (); - } - DONE; -}) +(define_expand "addti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;") -(define_expand "vec_extractv4sf" - [(match_operand:SF 0 "register_operand" "") - (match_operand:V4SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1])); - break; - case 1: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_shufps (op0, tmp, tmp, - const1_rtx)); - } - break; - case 2: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_unpckhps (op0, tmp, tmp)); - } - break; - case 3: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_shufps (op0, tmp, tmp, - GEN_INT (3))); - } - break; - default: - abort (); - } - DONE; -}) +(define_insn "*addti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0") + (match_operand:TI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)" + "#") -(define_expand "vec_initv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_SSE" -{ - ix86_expand_vector_init (operands[0], operands[1]); - DONE; -}) - -;; Add instructions +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+0, 1, operands+0, operands+3); + split_ti (operands+1, 1, operands+1, operands+4); + split_ti (operands+2, 1, operands+2, operands+5);") ;; %%% splits for addsidi3 ; [(set (match_operand:DI 0 "nonimmediate_operand" "") @@ -5236,18 +5124,17 @@ return "lea{q}\t{%a2, %0|%0, %a2}"; case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); - + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ @@ -5307,18 +5194,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* ???? We ought to handle there the 32bit case too - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5357,18 +5243,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort (); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* ???? We ought to handle there the 32bit case too - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5413,14 +5298,14 @@ case TYPE_INCDEC: if (operands[2] == constm1_rtx) return "inc{q}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{q}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -5456,18 +5341,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{q}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{q}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5504,18 +5388,17 @@ return "lea{l}\t{%a2, %0|%0, %a2}"; case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ @@ -5591,10 +5474,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5656,18 +5540,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5707,10 +5590,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5746,18 +5630,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5795,10 +5678,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{l}\t%k0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%k0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5841,14 +5725,14 @@ case TYPE_INCDEC: if (operands[2] == constm1_rtx) return "inc{l}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -5881,18 +5765,17 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) return "inc{l}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{l}\t%0"; else - abort(); + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5939,9 +5822,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -5978,9 +5863,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6018,9 +5905,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6055,9 +5944,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6079,7 +5970,7 @@ (const_string "alu"))) (set_attr "mode" "HI")]) -; See comments above addsi_3_imm for details. +; See comments above addsi_4 for details. (define_insn "*addhi_4" [(set (reg FLAGS_REG) (compare (match_operand:HI 1 "nonimmediate_operand" "0") @@ -6093,14 +5984,14 @@ case TYPE_INCDEC: if (operands[2] == constm1_rtx) return "inc{w}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{w}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{w}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if ((INTVAL (operands[2]) == -128 @@ -6133,9 +6024,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx) - return "dec{w}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6182,9 +6075,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx) - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6228,9 +6123,11 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx) - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6271,9 +6168,11 @@ case TYPE_INCDEC: if (operands[1] == const1_rtx) return "inc{b}\t%0"; - else if (operands[1] == constm1_rtx) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ @@ -6290,6 +6189,10 @@ (if_then_else (match_operand:QI 1 "incdec_operand" "") (const_string "incdec") (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "load") + (const_string "none"))) (set_attr "mode" "QI")]) (define_insn "*addqi_2" @@ -6308,11 +6211,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ @@ -6344,11 +6249,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ @@ -6367,7 +6274,7 @@ (const_string "alu"))) (set_attr "mode" "QI")]) -; See comments above addsi_3_imm for details. +; See comments above addsi_4 for details. (define_insn "*addqi_4" [(set (reg FLAGS_REG) (compare (match_operand:QI 1 "nonimmediate_operand" "0") @@ -6383,14 +6290,14 @@ || (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255)) return "inc{b}\t%0"; - else if (operands[2] == const1_rtx) - return "dec{b}\t%0"; else - abort(); + { + gcc_assert (operands[2] == const1_rtx); + return "dec{b}\t%0"; + } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (INTVAL (operands[2]) < 0) { operands[2] = GEN_INT (-INTVAL (operands[2])); @@ -6421,11 +6328,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } default: /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ @@ -6463,11 +6372,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%h0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; @@ -6497,11 +6408,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return "dec{b}\t%h0"; - abort(); + else + { + gcc_assert (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; @@ -6557,6 +6470,41 @@ ;; Subtract instructions +;; %%% splits for subditi3 + +(define_expand "subti3" + [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;") + +(define_insn "*subti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0") + (match_operand:TI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DI (match_dup 4) + (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+0, 1, operands+0, operands+3); + split_ti (operands+1, 1, operands+1, operands+4); + split_ti (operands+2, 1, operands+2, operands+5);") + ;; %%% splits for subsidi3 (define_expand "subdi3" @@ -6770,7 +6718,7 @@ (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sub{q}\t{%2, %0|%0, %2}" + "sub{l}\t{%2, %1|%1, %2}" [(set_attr "type" "alu") (set_attr "mode" "DI")]) @@ -7523,8 +7471,7 @@ } else { - if (true_regnum (operands[1])) - abort(); + gcc_assert (!true_regnum (operands[1])); operands[4] = operands[1]; } }) @@ -7607,8 +7554,7 @@ } else { - if (true_regnum (operands[1])) - abort(); + gcc_assert (!true_regnum (operands[1])); operands[4] = operands[1]; } }) @@ -7954,6 +7900,9 @@ (match_operand:SI 2 "const_int_operand" "")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 && (GET_MODE (operands[0]) == SImode || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) || GET_MODE (operands[0]) == HImode @@ -7969,8 +7918,8 @@ (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - /* The code below cannot deal with constants outside HOST_WIDE_INT. */ - && INTVAL (operands[1]) + INTVAL (operands[2]) < HOST_BITS_PER_WIDE_INT + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 /* Ensure that resulting mask is zero or sign extended operand. */ && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 @@ -8025,8 +7974,11 @@ val = gen_lowpart (QImode, val); } - mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; - mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); }) @@ -8102,14 +8054,14 @@ { enum machine_mode mode; - if (GET_CODE (operands[2]) != CONST_INT) - abort (); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); if (INTVAL (operands[2]) == 0xff) mode = QImode; - else if (INTVAL (operands[2]) == 0xffff) - mode = HImode; else - abort (); + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } operands[1] = gen_lowpart (mode, operands[1]); if (mode == QImode) @@ -8119,8 +8071,7 @@ } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); if (get_attr_mode (insn) == MODE_SI) return "and{l}\t{%k2, %k0|%k0, %k2}"; else @@ -8168,14 +8119,14 @@ { enum machine_mode mode; - if (GET_CODE (operands[2]) != CONST_INT) - abort (); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); if (INTVAL (operands[2]) == 0xff) mode = QImode; - else if (INTVAL (operands[2]) == 0xffff) - mode = HImode; else - abort (); + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } operands[1] = gen_lowpart (mode, operands[1]); if (mode == QImode) @@ -8185,8 +8136,7 @@ } default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{l}\t{%2, %0|%0, %2}"; } } @@ -8288,15 +8238,12 @@ switch (get_attr_type (insn)) { case TYPE_IMOVX: - if (GET_CODE (operands[2]) != CONST_INT) - abort (); - if (INTVAL (operands[2]) == 0xff) - return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; - abort (); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + gcc_assert (INTVAL (operands[2]) == 0xff); + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; default: - if (! rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{w}\t{%2, %0|%0, %2}"; } @@ -9370,6 +9317,43 @@ ;; Negation instructions +(define_expand "negti2" + [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "ix86_expand_unary_operator (NEG, TImode, operands); DONE;") + +(define_insn "*negti2_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=ro") + (neg:TI (match_operand:TI 1 "general_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_unary_operator_ok (NEG, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_dup 2)) (const_int 0))) + (set (match_dup 0) (neg:DI (match_dup 2)))]) + (parallel + [(set (match_dup 1) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 1) + (neg:DI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (operands+1, 1, operands+2, operands+3); + split_ti (operands+0, 1, operands+0, operands+1);") + (define_expand "negdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") (neg:DI (match_operand:DI 1 "nonimmediate_operand" ""))) @@ -9561,27 +9545,27 @@ "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;") (define_insn "*absnegsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + [(set (match_operand:SF 0 "nonimmediate_operand" "=x ,x,f,rm") (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x#fr,0 ,0")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0 ,X ,X")) + [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0,0 ")])) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0,X,X ")) (clobber (reg:CC FLAGS_REG))] "TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" "#") (define_insn "*absnegsf2_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#r,x#r,rm#x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,rm") (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x#r,0")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0 ,X")) + [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")])) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,X")) (clobber (reg:CC FLAGS_REG))] "TARGET_SSE_MATH && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" "#") (define_insn "*absnegsf2_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm") (match_operator:SF 3 "absneg_operator" [(match_operand:SF 1 "nonimmediate_operand" "0,0")])) (use (match_operand 2 "" "")) @@ -9590,6 +9574,60 @@ && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" "#") +(define_expand "copysignsf3" + [(match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "nonmemory_operand" "") + (match_operand:SF 2 "register_operand" "")] + "TARGET_SSE_MATH" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysignsf3_const" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF + [(match_operand:V4SF 1 "vector_move_operand" "xmC") + (match_operand:SF 2 "register_operand" "0") + (match_operand:V4SF 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "TARGET_SSE_MATH" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysignsf3_var" + [(set (match_operand:SF 0 "register_operand" "=x, x, x, x,x") + (unspec:SF + [(match_operand:SF 2 "register_operand" " x, 0, 0, x,x") + (match_operand:SF 3 "register_operand" " 1, 1, x, 1,x") + (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0") + (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V4SF 1 "=x, x, x, x,x"))] + "TARGET_SSE_MATH" + "#") + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF + [(match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "register_operand" "") + (match_operand:V4SF 4 "" "") + (match_operand:V4SF 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V4SF 1 ""))] + "TARGET_SSE_MATH && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) + (define_expand "negdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))] @@ -9603,27 +9641,27 @@ "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;") (define_insn "*absnegdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,Y,f,rm") (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y#fr,0 ,0")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym ,0 ,X ,X")) + [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y,0,0")])) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,X,X")) (clobber (reg:CC FLAGS_REG))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" "#") (define_insn "*absnegdf2_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#r,Y#r,rm#Y") + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,Y,rm") (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y#r,0")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym ,0 ,X")) + [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y,0 ")])) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,X ")) (clobber (reg:CC FLAGS_REG))] "TARGET_SSE2 && TARGET_SSE_MATH && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" "#") (define_insn "*absnegdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f") + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm") (match_operator:DF 3 "absneg_operator" [(match_operand:DF 1 "nonimmediate_operand" "0,0")])) (use (match_operand 2 "" "")) @@ -9632,6 +9670,60 @@ && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" "#") +(define_expand "copysigndf3" + [(match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "nonmemory_operand" "") + (match_operand:DF 2 "register_operand" "")] + "TARGET_SSE2 && TARGET_SSE_MATH" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysigndf3_const" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF + [(match_operand:V2DF 1 "vector_move_operand" "xmC") + (match_operand:DF 2 "register_operand" "0") + (match_operand:V2DF 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysigndf3_var" + [(set (match_operand:DF 0 "register_operand" "=x, x, x, x,x") + (unspec:DF + [(match_operand:DF 2 "register_operand" " x, 0, 0, x,x") + (match_operand:DF 3 "register_operand" " 1, 1, x, 1,x") + (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0") + (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V2DF 1 "=x, x, x, x,x"))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF + [(match_operand:DF 2 "register_operand" "") + (match_operand:DF 3 "register_operand" "") + (match_operand:V2DF 4 "" "") + (match_operand:V2DF 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:V2DF 1 ""))] + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) + (define_expand "negxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))] @@ -9640,12 +9732,12 @@ (define_expand "absxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))] + (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))] "TARGET_80387" "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;") (define_insn "*absnegxf2_i387" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm") (match_operator:XF 3 "absneg_operator" [(match_operand:XF 1 "nonimmediate_operand" "0,0")])) (use (match_operand 2 "" "")) @@ -9813,7 +9905,7 @@ (define_insn "*negsf2_1" [(set (match_operand:SF 0 "register_operand" "=f") (neg:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "SF")]) @@ -9821,7 +9913,7 @@ (define_insn "*negdf2_1" [(set (match_operand:DF 0 "register_operand" "=f") (neg:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) @@ -9829,7 +9921,7 @@ (define_insn "*negxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) @@ -9837,7 +9929,7 @@ (define_insn "*abssf2_1" [(set (match_operand:SF 0 "register_operand" "=f") (abs:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "SF")]) @@ -9845,7 +9937,7 @@ (define_insn "*absdf2_1" [(set (match_operand:DF 0 "register_operand" "=f") (abs:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) @@ -9853,7 +9945,7 @@ (define_insn "*absxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" + "TARGET_80387" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) @@ -10143,6 +10235,92 @@ ;; shift pair, instead using moves and sign extension for counts greater ;; than 31. +(define_expand "ashlti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_ashlti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (ASHIFT, TImode, operands); + DONE; +}) + +(define_insn "ashlti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*ashlti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "J,c")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "@ + shld{q}\t{%2, %1, %0|%0, %1, %2} + shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector")]) + +(define_expand "x86_64_shift_adj" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 64)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_64BIT" + "") + (define_expand "ashldi3" [(set (match_operand:DI 0 "shiftdi_operand" "") (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "") @@ -10160,16 +10338,13 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - if (!rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "add{q}\t{%0, %0|%0, %0}"; case TYPE_LEA: - if (GET_CODE (operands[2]) != CONST_INT - || (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 3) - abort (); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3); operands[1] = gen_rtx_MULT (DImode, operands[1], GEN_INT (1 << INTVAL (operands[2]))); return "lea{q}\t{%a1, %0|%0, %a1}"; @@ -10226,8 +10401,43 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "e")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); return "add{q}\t{%0, %0|%0, %0}"; default: @@ -10271,16 +10481,17 @@ (match_dup 3)] "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_ashldi (operands, operands[3]); DONE;") + "ix86_split_ashl (operands, operands[3], DImode); DONE;") (define_split [(set (match_operand:DI 0 "register_operand" "") (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)" + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" [(const_int 0)] - "ix86_split_ashldi (operands, NULL_RTX); DONE;") + "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") (define_insn "x86_shld_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") @@ -10361,10 +10572,8 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); - if (!rtx_equal_p (operands[0], operands[1])) - abort (); + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); return "add{l}\t{%0, %0|%0, %0}"; case TYPE_LEA: @@ -10430,7 +10639,7 @@ [(const_int 0)] { rtx pat, clob; - emit_move_insn (operands[1], operands[0]); + emit_move_insn (operands[0], operands[1]); pat = gen_rtx_SET (VOIDmode, operands[0], gen_rtx_ASHIFT (GET_MODE (operands[0]), operands[0], operands[2])); @@ -10449,8 +10658,7 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); return "add{l}\t{%k0, %k0|%k0, %k0}"; case TYPE_LEA: @@ -10500,7 +10708,7 @@ [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashift:SI (match_dup 1) (match_dup 2)))] @@ -10510,8 +10718,7 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); return "add{l}\t{%0, %0|%0, %0}"; default: @@ -10534,11 +10741,47 @@ (const_string "ishift"))) (set_attr "mode" "SI")]) -(define_insn "*ashlsi3_cmp_zext" +(define_insn "*ashlsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cmp_zext" [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] @@ -10548,8 +10791,7 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); return "add{l}\t{%k0, %k0|%k0, %k0}"; default: @@ -10592,8 +10834,7 @@ case TYPE_LEA: return "#"; case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); return "add{w}\t{%0, %0|%0, %0}"; default: @@ -10629,8 +10870,7 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); return "add{w}\t{%0, %0|%0, %0}"; default: @@ -10660,7 +10900,7 @@ [(set (reg FLAGS_REG) (compare (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_dup 1) (match_dup 2)))] @@ -10670,8 +10910,43 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_insn "*ashlhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); return "add{w}\t{%0, %0|%0, %0}"; default: @@ -10717,8 +10992,7 @@ case TYPE_LEA: return "#"; case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) return "add{l}\t{%k0, %k0|%k0, %k0}"; else @@ -10772,8 +11046,7 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) return "add{l}\t{%k0, %k0|%k0, %k0}"; else @@ -10821,7 +11094,7 @@ [(set (reg FLAGS_REG) (compare (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashift:QI (match_dup 1) (match_dup 2)))] @@ -10831,8 +11104,43 @@ switch (get_attr_type (insn)) { case TYPE_ALU: - if (operands[2] != const1_rtx) - abort (); + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_size)) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +(define_insn "*ashlqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); return "add{b}\t{%0, %0|%0, %0}"; default: @@ -10857,6 +11165,76 @@ ;; See comment above `ashldi3' about how this works. +(define_expand "ashrti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_ashrti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (ASHIFTRT, TImode, operands); + DONE; +}) + +(define_insn "ashrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*ashrti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "J,c")) + (ashift:DI (match_operand:DI 1 "register_operand" "r,r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "@ + shrd{q}\t{%2, %1, %0|%0, %1, %2} + shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector")]) + (define_expand "ashrdi3" [(set (match_operand:DI 0 "shiftdi_operand" "") (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") @@ -10928,6 +11306,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrdi3_one_bit_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -10945,6 +11337,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "DI")]) +(define_insn "*ashrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + (define_insn "*ashrdi3_1" [(set (match_operand:DI 0 "register_operand" "=r") (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") @@ -10966,16 +11371,17 @@ (match_dup 3)] "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_ashrdi (operands, operands[3]); DONE;") + "ix86_split_ashr (operands, operands[3], DImode); DONE;") (define_split [(set (match_operand:DI 0 "register_operand" "") (ashiftrt:DI (match_operand:DI 1 "register_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)" + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" [(const_int 0)] - "ix86_split_ashrdi (operands, NULL_RTX); DONE;") + "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") (define_insn "x86_shrd_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") @@ -11132,6 +11538,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + (define_insn "*ashrsi3_one_bit_cmp_zext" [(set (reg FLAGS_REG) (compare @@ -11154,7 +11574,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11164,11 +11584,24 @@ [(set_attr "type" "ishift") (set_attr "mode" "SI")]) +(define_insn "*ashrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + (define_insn "*ashrsi3_cmp_zext" [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] @@ -11233,6 +11666,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11240,7 +11687,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11250,6 +11697,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "HI")]) +(define_insn "*ashrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + (define_expand "ashrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -11333,6 +11793,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrqi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11340,7 +11814,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] @@ -11349,22 +11823,90 @@ "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + ;; Logical shift instructions ;; See comment above `ashldi3' about how this works. -(define_expand "lshrdi3" - [(set (match_operand:DI 0 "shiftdi_operand" "") - (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") - (match_operand:QI 2 "nonmemory_operand" "")))] - "" - "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;") +(define_expand "lshrti3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_lshrti3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (LSHIFTRT, TImode, operands); + DONE; +}) -(define_insn "*lshrdi3_1_one_bit_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") - (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const1_operand" ""))) +(define_insn "lshrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "c"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*lshrti3_2" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "O"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;") + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;") + +(define_insn "*lshrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11408,6 +11950,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrdi3_cconly_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11425,6 +11981,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "DI")]) +(define_insn "*lshrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "e")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + (define_insn "*lshrdi3_1" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") @@ -11446,16 +12015,17 @@ (match_dup 3)] "!TARGET_64BIT && TARGET_CMOVE" [(const_int 0)] - "ix86_split_lshrdi (operands, operands[3]); DONE;") + "ix86_split_lshr (operands, operands[3], DImode); DONE;") (define_split [(set (match_operand:DI 0 "register_operand" "") (lshiftrt:DI (match_operand:DI 1 "register_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)" + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? flow2_completed : reload_completed)" [(const_int 0)] - "ix86_split_lshrdi (operands, NULL_RTX); DONE;") + "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") (define_expand "lshrsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") @@ -11536,6 +12106,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + (define_insn "*lshrsi3_cmp_one_bit_zext" [(set (reg FLAGS_REG) (compare @@ -11558,7 +12142,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11568,11 +12152,24 @@ [(set_attr "type" "ishift") (set_attr "mode" "SI")]) +(define_insn "*lshrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + (define_insn "*lshrsi3_cmp_zext" [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] @@ -11637,6 +12234,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11644,7 +12255,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11654,6 +12265,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "HI")]) +(define_insn "*lshrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + (define_expand "lshrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -11736,6 +12360,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrqi2_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11743,7 +12381,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] @@ -11752,17 +12390,66 @@ "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) + +(define_insn "*lshrqi2_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) ;; Rotate instructions (define_expand "rotldi3" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "") + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATE, DImode, operands); DONE;") + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATE, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2])); + DONE; +}) +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashift:SI (match_dup 4) (match_dup 2)) + (lshiftrt:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashift:SI (match_dup 5) (match_dup 2)) + (lshiftrt:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands, 1, operands + 4, operands + 5);") + (define_insn "*rotlsi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") @@ -11944,12 +12631,48 @@ (set_attr "mode" "QI")]) (define_expand "rotrdi3" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" ""))) + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATERT, DImode, operands); DONE;") + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATERT, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2)) + (ashift:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2)) + (ashift:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (operands, 1, operands + 4, operands + 5);") (define_insn "*rotrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -12139,8 +12862,8 @@ (define_expand "extv" [(set (match_operand:SI 0 "register_operand" "") (sign_extract:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "immediate_operand" "") - (match_operand:SI 3 "immediate_operand" "")))] + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] "" { /* Handle extractions from %ah et al. */ @@ -12156,8 +12879,8 @@ (define_expand "extzv" [(set (match_operand:SI 0 "register_operand" "") (zero_extract:SI (match_operand 1 "ext_register_operand" "") - (match_operand:SI 2 "immediate_operand" "") - (match_operand:SI 3 "immediate_operand" "")))] + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] "" { /* Handle extractions from %ah et al. */ @@ -12172,12 +12895,12 @@ (define_expand "insv" [(set (zero_extract (match_operand 0 "ext_register_operand" "") - (match_operand 1 "immediate_operand" "") - (match_operand 2 "immediate_operand" "")) + (match_operand 1 "const8_operand" "") + (match_operand 2 "const8_operand" "")) (match_operand 3 "register_operand" ""))] "" { - /* Handle extractions from %ah et al. */ + /* Handle insertions to %ah et al. */ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) FAIL; @@ -12544,17 +13267,14 @@ ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent -;; it directly. Further holding this value in pseudo register might bring -;; problem in implicit normalization in spill code. -;; So we don't define FLOAT_STORE_FLAG_VALUE and create these -;; instructions after reload by splitting the conditional move patterns. +;; it directly. (define_insn "*sse_setccsf" [(set (match_operand:SF 0 "register_operand" "=x") (match_operator:SF 1 "sse_comparison_operator" [(match_operand:SF 2 "register_operand" "0") (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && reload_completed" + "TARGET_SSE" "cmp%D1ss\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") (set_attr "mode" "SF")]) @@ -12564,7 +13284,7 @@ (match_operator:DF 1 "sse_comparison_operator" [(match_operand:DF 2 "register_operand" "0") (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 && reload_completed" + "TARGET_SSE2" "cmp%D1sd\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") (set_attr "mode" "DF")]) @@ -12660,7 +13380,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNORDERED, operands[0]); DONE;") (define_expand "bordered" @@ -12668,7 +13388,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (ORDERED, operands[0]); DONE;") (define_expand "buneq" @@ -12676,7 +13396,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNEQ, operands[0]); DONE;") (define_expand "bunge" @@ -12684,7 +13404,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNGE, operands[0]); DONE;") (define_expand "bungt" @@ -12692,7 +13412,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNGT, operands[0]); DONE;") (define_expand "bunle" @@ -12700,7 +13420,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNLE, operands[0]); DONE;") (define_expand "bunlt" @@ -12708,7 +13428,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (UNLT, operands[0]); DONE;") (define_expand "bltgt" @@ -12716,7 +13436,7 @@ (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "ix86_expand_branch (LTGT, operands[0]); DONE;") (define_insn "*jcc_1" @@ -12808,18 +13528,17 @@ ;; during early optimization. Splitting the operation apart early makes ;; for bad code when we want to reverse the operation. -(define_insn "*fp_jcc_1" +(define_insn "*fp_jcc_1_mixed" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f")]) + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) (label_ref (match_operand 3 "" "")) (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && FLOAT_MODE_P (GET_MODE (operands[1])) + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -12827,44 +13546,44 @@ (define_insn "*fp_jcc_1_sse" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f#x,x#f") - (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) (label_ref (match_operand 3 "" "")) (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_80387 + "TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_1_sse_only" +(define_insn "*fp_jcc_1_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "x") - (match_operand 2 "nonimmediate_operand" "xm")]) + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) (label_ref (match_operand 3 "" "")) (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + "TARGET_CMOVE && TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_2" +(define_insn "*fp_jcc_2_mixed" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f")]) + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) (pc) (label_ref (match_operand 3 "" "")))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) - && FLOAT_MODE_P (GET_MODE (operands[1])) + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -12872,33 +13591,34 @@ (define_insn "*fp_jcc_2_sse" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "f#x,x#f") - (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) (pc) (label_ref (match_operand 3 "" "")))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_80387 + "TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_2_sse_only" +(define_insn "*fp_jcc_2_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" - [(match_operand 1 "register_operand" "x") - (match_operand 2 "nonimmediate_operand" "xm")]) + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) (pc) (label_ref (match_operand 3 "" "")))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + "TARGET_CMOVE && TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_3" +(define_insn "*fp_jcc_3_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") @@ -12917,7 +13637,7 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_4" +(define_insn "*fp_jcc_4_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") @@ -12936,7 +13656,7 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_5" +(define_insn "*fp_jcc_5_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") @@ -12952,7 +13672,7 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_6" +(define_insn "*fp_jcc_6_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") @@ -12968,11 +13688,11 @@ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -(define_insn "*fp_jcc_7" +(define_insn "*fp_jcc_7_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "register_operand" "f") - (match_operand 2 "const_double_operand" "C")]) + (match_operand 2 "const0_operand" "X")]) (label_ref (match_operand 3 "" "")) (pc))) (clobber (reg:CCFP FPSR_REG)) @@ -12980,30 +13700,30 @@ (clobber (match_scratch:HI 4 "=a"))] "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && operands[2] == CONST0_RTX (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) && !ix86_use_fcomi_compare (GET_CODE (operands[0])) && SELECT_CC_MODE (GET_CODE (operands[0]), operands[1], operands[2]) == CCFPmode && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") -;; The order of operands in *fp_jcc_8 is forced by combine in +;; The order of operands in *fp_jcc_8_387 is forced by combine in ;; simplify_comparison () function. Float operator is treated as RTX_OBJ ;; with a precedence over other operators and is always put in the first ;; place. Swap condition and operands to match ficom instruction. -(define_insn "*fp_jcc_8" +(define_insn "*fp_jcc_8_387" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operator 1 "float_operator" - [(match_operand:SI 2 "nonimmediate_operand" "m,?r")]) + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) (match_operand 3 "register_operand" "f,f")]) (label_ref (match_operand 4 "" "")) (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 5 "=a,a"))] - "TARGET_80387 && TARGET_USE_FIOP + "TARGET_80387 && TARGET_USE_MODE_FIOP && FLOAT_MODE_P (GET_MODE (operands[3])) && GET_MODE (operands[1]) == GET_MODE (operands[3]) && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) @@ -13050,7 +13770,7 @@ [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operator 1 "float_operator" - [(match_operand:SI 2 "memory_operand" "")]) + [(match_operand:X87MODEI12 2 "memory_operand" "")]) (match_operand 3 "register_operand" "")]) (match_operand 4 "" "") (match_operand 5 "" ""))) @@ -13072,7 +13792,7 @@ [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operator 1 "float_operator" - [(match_operand:SI 2 "register_operand" "")]) + [(match_operand:X87MODEI12 2 "register_operand" "")]) (match_operand 3 "register_operand" "")]) (match_operand 4 "" "") (match_operand 5 "" ""))) @@ -13178,113 +13898,6 @@ [(set_attr "type" "ibr") (set_attr "length_immediate" "0")]) -;; Loop instruction -;; -;; This is all complicated by the fact that since this is a jump insn -;; we must handle our own reloads. - -(define_expand "doloop_end" - [(use (match_operand 0 "" "")) ; loop pseudo - (use (match_operand 1 "" "")) ; iterations; zero if unknown - (use (match_operand 2 "" "")) ; max iterations - (use (match_operand 3 "" "")) ; loop level - (use (match_operand 4 "" ""))] ; label - "!TARGET_64BIT && TARGET_USE_LOOP" - " -{ - /* Only use cloop on innermost loops. */ - if (INTVAL (operands[3]) > 1) - FAIL; - if (GET_MODE (operands[0]) != SImode) - FAIL; - emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0], - operands[0])); - DONE; -}") - -(define_insn "doloop_end_internal" - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r") - (const_int 1)) - (label_ref (match_operand 0 "" "")) - (pc))) - (set (match_operand:SI 2 "nonimmediate_operand" "=1,1,*m*r") - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 3 "=X,X,r")) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_USE_LOOP - && (reload_in_progress || reload_completed - || register_operand (operands[2], VOIDmode))" -{ - if (which_alternative != 0) - return "#"; - if (get_attr_length (insn) == 2) - return "%+loop\t%l0"; - else - return "dec{l}\t%1\;%+jne\t%l0"; -} - [(set (attr "length") - (if_then_else (and (eq_attr "alternative" "0") - (and (ge (minus (match_dup 0) (pc)) - (const_int -126)) - (lt (minus (match_dup 0) (pc)) - (const_int 128)))) - (const_int 2) - (const_int 16))) - ;; We don't know the type before shorten branches. Optimistically expect - ;; the loop instruction to match. - (set (attr "type") (const_string "ibr"))]) - -(define_split - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "") - (const_int 1)) - (match_operand 0 "" "") - (pc))) - (set (match_dup 1) - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 2 "")) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_USE_LOOP - && reload_completed - && REGNO (operands[1]) != 2" - [(parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (plus:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))]) - (set (pc) (if_then_else (ne (reg:CCZ FLAGS_REG) (const_int 0)) - (match_dup 0) - (pc)))] - "") - -(define_split - [(set (pc) - (if_then_else (ne (match_operand:SI 1 "register_operand" "") - (const_int 1)) - (match_operand 0 "" "") - (pc))) - (set (match_operand:SI 2 "nonimmediate_operand" "") - (plus:SI (match_dup 1) - (const_int -1))) - (clobber (match_scratch:SI 3 "")) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_USE_LOOP - && reload_completed - && (! REG_P (operands[2]) - || ! rtx_equal_p (operands[1], operands[2]))" - [(set (match_dup 3) (match_dup 1)) - (parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (plus:SI (match_dup 3) (const_int -1)) - (const_int 0))) - (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))]) - (set (match_dup 2) (match_dup 3)) - (set (pc) (if_then_else (ne (reg:CCZ FLAGS_REG) (const_int 0)) - (match_dup 0) - (pc)))] - "") - ;; Convert setcc + movzbl to xor + setcc if operands don't overlap. (define_peephole2 @@ -13301,7 +13914,7 @@ (set (strict_low_part (match_dup 5)) (match_dup 2))] { - operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) @@ -13323,7 +13936,7 @@ (set (strict_low_part (match_dup 5)) (match_dup 2))] { - operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) @@ -13625,7 +14238,7 @@ #else /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. The align insn is used to avoid 3 jump instructions in the row to improve - branch prediction and the benefits hardly outweight the cost of extra 8 + branch prediction and the benefits hardly outweigh the cost of extra 8 nops on the average inserted by full alignment pseudo operation. */ #endif return ""; @@ -13642,10 +14255,28 @@ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - { return output_set_got (operands[0]); } + { return output_set_got (operands[0], NULL_RTX); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_labelled" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + { return output_set_got (operands[0], operands[1]); } [(set_attr "type" "multi") (set_attr "length" "12")]) +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + (define_expand "epilogue" [(const_int 1)] "" @@ -13925,13 +14556,19 @@ operands[2] = gen_reg_rtx (Pmode); emit_insn (gen_set_got (operands[2])); } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], operands[1], operands[2])); + DONE; + } operands[3] = ix86_tls_get_addr (); }) (define_insn "*tls_global_dynamic_64" [(set (match_operand:DI 0 "register_operand" "=a") - (call (mem:QI (match_operand:DI 2 "call_insn_operand" "")) - (match_operand:DI 3 "" ""))) + (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLS_GD)] "TARGET_64BIT" @@ -13941,11 +14578,17 @@ (define_expand "tls_global_dynamic_64" [(parallel [(set (match_operand:DI 0 "register_operand" "") - (call (mem:QI (match_dup 2)) (const_int 0))) + (call:DI (mem:QI (match_dup 2)) (const_int 0))) (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLS_GD)])] "" { + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], operands[1])); + DONE; + } operands[2] = ix86_tls_get_addr (); }) @@ -13992,13 +14635,19 @@ operands[1] = gen_reg_rtx (Pmode); emit_insn (gen_set_got (operands[1])); } - operands[2] = ix86_tls_get_addr (); + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], ix86_tls_module_base (), operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); }) (define_insn "*tls_local_dynamic_base_64" [(set (match_operand:DI 0 "register_operand" "=a") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "")) - (match_operand:DI 2 "" ""))) + (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" @@ -14007,10 +14656,16 @@ (define_expand "tls_local_dynamic_base_64" [(parallel [(set (match_operand:DI 0 "register_operand" "") - (call (mem:QI (match_dup 1)) (const_int 0))) + (call:DI (mem:QI (match_dup 1)) (const_int 0))) (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] "" { + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], ix86_tls_module_base ())); + DONE; + } operands[1] = ix86_tls_get_addr (); }) @@ -14088,6 +14743,136 @@ (set_attr "length" "7") (set_attr "memory" "load") (set_attr "imm_disp" "false")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand" "") + (const:SI + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "") + (match_operand:SI 4 "" "") + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "") + (match_operand:DI 3 "" "") + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and @@ -14101,10 +14886,10 @@ ;; so use special patterns for add and mull. (define_insn "*fop_sf_comm_mixed" - [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") + [(set (match_operand:SF 0 "register_operand" "=f,x") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))] + (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))] "TARGET_MIX_SSE_I387 && COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -14153,7 +14938,7 @@ [(set (match_operand:SF 0 "register_operand" "=f,f,x") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))] + (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))] "TARGET_MIX_SSE_I387 && !COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -14192,12 +14977,13 @@ (const_string "sseadd"))) (set_attr "mode" "SF")]) +;; This pattern is not fully shadowed by the pattern above. (define_insn "*fop_sf_1_i387" [(set (match_operand:SF 0 "register_operand" "=f,f") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "nonimmediate_operand" "0,fm") (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387 + "TARGET_80387 && !TARGET_SSE_MATH && !COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" @@ -14210,14 +14996,13 @@ (const_string "fop"))) (set_attr "mode" "SF")]) - ;; ??? Add SSE splitters for these! -(define_insn "*fop_sf_2_i387" +(define_insn "*fop_sf_2_i387" [(set (match_operand:SF 0 "register_operand" "=f,f") (match_operator:SF 3 "binary_fp_operator" - [(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) (match_operand:SF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" + "TARGET_80387 && TARGET_USE_MODE_FIOP && !TARGET_SSE_MATH" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:SF 3 "mult_operator" "") @@ -14227,14 +15012,14 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) -(define_insn "*fop_sf_3_i387" +(define_insn "*fop_sf_3_i387" [(set (match_operand:SF 0 "register_operand" "=f,f") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "register_operand" "0,0") - (float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" + (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_MODE_FIOP && !TARGET_SSE_MATH" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:SF 3 "mult_operator" "") @@ -14244,23 +15029,23 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) (define_insn "*fop_df_comm_mixed" - [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") + [(set (match_operand:DF 0 "register_operand" "=f,Y") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))] + (match_operand:DF 2 "nonimmediate_operand" "fm,Ym")]))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 && COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:DF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd")) - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:DF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) (set_attr "mode" "DF")]) @@ -14275,7 +15060,7 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:DF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd"))) (set_attr "mode" "DF")]) @@ -14290,26 +15075,26 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:DF 3 "mult_operator" "") (const_string "fmul") (const_string "fop"))) (set_attr "mode" "DF")]) (define_insn "*fop_df_1_mixed" - [(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f") + [(set (match_operand:DF 0 "register_operand" "=f,f,Y") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))] + (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym")]))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && !COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(and (eq_attr "alternative" "2") - (match_operand:SF 3 "mult_operator" "")) + (match_operand:DF 3 "mult_operator" "")) (const_string "ssemul") (and (eq_attr "alternative" "2") - (match_operand:SF 3 "div_operator" "")) + (match_operand:DF 3 "div_operator" "")) (const_string "ssediv") (eq_attr "alternative" "2") (const_string "sseadd") @@ -14331,19 +15116,20 @@ "* return output_387_binary_op (insn, operands);" [(set_attr "mode" "DF") (set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") + (cond [(match_operand:DF 3 "mult_operator" "") (const_string "ssemul") - (match_operand:SF 3 "div_operator" "") + (match_operand:DF 3 "div_operator" "") (const_string "ssediv") ] (const_string "sseadd")))]) +;; This pattern is not fully shadowed by the pattern above. (define_insn "*fop_df_1_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "nonimmediate_operand" "0,fm") (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387 + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) && !COMMUTATIVE_ARITH_P (operands[3]) && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" @@ -14357,12 +15143,13 @@ (set_attr "mode" "DF")]) ;; ??? Add SSE splitters for these! -(define_insn "*fop_df_2_i387" +(define_insn "*fop_df_2_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" - [(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) (match_operand:DF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "TARGET_80387 && TARGET_USE_MODE_FIOP + && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") @@ -14372,14 +15159,15 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) -(define_insn "*fop_df_3_i387" +(define_insn "*fop_df_3_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "0,0") - (float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" + (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_MODE_FIOP + && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") @@ -14389,7 +15177,7 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) (define_insn "*fop_df_4_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") @@ -14474,12 +15262,12 @@ (const_string "fop"))) (set_attr "mode" "XF")]) -(define_insn "*fop_xf_2_i387" +(define_insn "*fop_xf_2_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) (match_operand:XF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP" + "TARGET_80387 && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14489,14 +15277,14 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) -(define_insn "*fop_xf_3_i387" +(define_insn "*fop_xf_3_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,0") - (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP" + (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14506,7 +15294,7 @@ ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) (define_insn "*fop_xf_4_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") @@ -14562,7 +15350,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (match_operator 3 "binary_fp_operator" - [(float (match_operand:SI 1 "register_operand" "")) + [(float (match_operand:X87MODEI12 1 "register_operand" "")) (match_operand 2 "register_operand" "")]))] "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" @@ -14583,7 +15371,7 @@ [(set (match_operand 0 "register_operand" "") (match_operator 3 "binary_fp_operator" [(match_operand 1 "register_operand" "") - (float (match_operand:SI 2 "register_operand" ""))]))] + (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] @@ -14611,8 +15399,8 @@ }) (define_insn "*sqrtsf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") - (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0#x,xm#f")))] + [(set (match_operand:SF 0 "register_operand" "=f,x") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm")))] "TARGET_USE_FANCY_MATH_387 && TARGET_MIX_SSE_I387" "@ fsqrt @@ -14649,8 +15437,8 @@ }) (define_insn "*sqrtdf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") - (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))] + [(set (match_operand:DF 0 "register_operand" "=f,Y") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym")))] "TARGET_USE_FANCY_MATH_387 && TARGET_SSE2 && TARGET_MIX_SSE_I387" "@ fsqrt @@ -14739,6 +15527,7 @@ (use (match_operand:SF 1 "register_operand" "")) (use (match_operand:SF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx label = gen_label_rtx (); @@ -14763,6 +15552,7 @@ (use (match_operand:DF 1 "register_operand" "")) (use (match_operand:DF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx label = gen_label_rtx (); @@ -14822,6 +15612,7 @@ (use (match_operand:SF 1 "register_operand" "")) (use (match_operand:SF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx label = gen_label_rtx (); @@ -14846,6 +15637,7 @@ (use (match_operand:DF 1 "register_operand" "")) (use (match_operand:DF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx label = gen_label_rtx (); @@ -14888,6 +15680,7 @@ [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsin" [(set_attr "type" "fpspc") @@ -14897,6 +15690,7 @@ [(set (match_operand:SF 0 "register_operand" "=f") (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsin" [(set_attr "type" "fpspc") @@ -14908,6 +15702,7 @@ (match_operand:SF 1 "register_operand" "0"))] UNSPEC_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsin" [(set_attr "type" "fpspc") @@ -14926,6 +15721,7 @@ [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -14935,6 +15731,7 @@ [(set (match_operand:SF 0 "register_operand" "=f") (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -14946,6 +15743,7 @@ (match_operand:SF 1 "register_operand" "0"))] UNSPEC_COS))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -14973,6 +15771,7 @@ (set (match_operand:DF 1 "register_operand" "=u") (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsincos" [(set_attr "type" "fpspc") @@ -15007,6 +15806,7 @@ (set (match_operand:SF 1 "register_operand" "=u") (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsincos" [(set_attr "type" "fpspc") @@ -15043,6 +15843,7 @@ (unspec:DF [(float_extend:DF (match_dup 2))] UNSPEC_SINCOS_SIN))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fsincos" [(set_attr "type" "fpspc") @@ -15117,6 +15918,7 @@ (set (match_operand:DF 1 "register_operand" "=u") (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fptan" [(set_attr "type" "fpspc") @@ -15147,6 +15949,7 @@ (set (match_operand:DF 0 "register_operand" "") (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (DFmode); @@ -15159,6 +15962,7 @@ (set (match_operand:SF 1 "register_operand" "=u") (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fptan" [(set_attr "type" "fpspc") @@ -15189,6 +15993,7 @@ (set (match_operand:SF 0 "register_operand" "") (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (SFmode); @@ -15243,16 +16048,18 @@ UNSPEC_FPATAN)) (clobber (match_scratch:DF 3 "=1"))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fpatan" [(set_attr "type" "fpspc") (set_attr "mode" "DF")]) (define_expand "atan2df3" - [(use (match_operand:DF 0 "register_operand" "=f")) - (use (match_operand:DF 2 "register_operand" "0")) - (use (match_operand:DF 1 "register_operand" "u"))] + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx copy = gen_reg_rtx (DFmode); @@ -15268,6 +16075,7 @@ UNSPEC_FPATAN)) (clobber (match_scratch:DF 3 ""))])] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (DFmode); @@ -15281,16 +16089,18 @@ UNSPEC_FPATAN)) (clobber (match_scratch:SF 3 "=1"))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" "fpatan" [(set_attr "type" "fpspc") (set_attr "mode" "SF")]) (define_expand "atan2sf3" - [(use (match_operand:SF 0 "register_operand" "=f")) - (use (match_operand:SF 2 "register_operand" "0")) - (use (match_operand:SF 1 "register_operand" "u"))] + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx copy = gen_reg_rtx (SFmode); @@ -15306,6 +16116,7 @@ UNSPEC_FPATAN)) (clobber (match_scratch:SF 3 ""))])] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (SFmode); @@ -15325,9 +16136,9 @@ (set_attr "mode" "XF")]) (define_expand "atan2xf3" - [(use (match_operand:XF 0 "register_operand" "=f")) - (use (match_operand:XF 2 "register_operand" "0")) - (use (match_operand:XF 1 "register_operand" "u"))] + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 2 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { @@ -15363,6 +16174,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -15386,6 +16198,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -15430,6 +16243,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -15453,6 +16267,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -15506,6 +16321,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15528,6 +16344,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15565,6 +16382,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15587,6 +16405,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15624,6 +16443,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (XFmode); @@ -15643,6 +16463,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (XFmode); @@ -15680,6 +16501,7 @@ [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); @@ -15695,6 +16517,7 @@ [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); @@ -15738,6 +16561,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (XFmode); @@ -15755,6 +16579,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 4)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (XFmode); @@ -15784,6 +16609,7 @@ (fix:SI (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { operands[2] = gen_reg_rtx (XFmode); @@ -15831,6 +16657,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 10)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15860,6 +16687,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 10)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15915,6 +16743,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 10)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15944,6 +16773,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 10)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -15998,6 +16828,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 8)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -16023,6 +16854,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 8)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { int i; @@ -16078,6 +16910,7 @@ (set (match_operand:DF 0 "register_operand" "") (float_truncate:DF (match_dup 14)))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -16114,6 +16947,7 @@ (set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_dup 14)))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx temp; @@ -16159,6 +16993,71 @@ emit_move_insn (operands[2], temp); emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */ }) + +(define_expand "ldexpdf3" + [(set (match_dup 3) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_dup 5) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 6) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF (match_dup 5)))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<7; i++) + operands[i] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexpsf3" + [(set (match_dup 3) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_dup 5) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 6) + (unspec:XF [(match_dup 3) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF (match_dup 5)))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<7; i++) + operands[i] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexpxf3" + [(set (match_dup 3) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 1) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + for (i=3; i<5; i++) + operands[i] = gen_reg_rtx (XFmode); +}) (define_insn "frndintxf2" @@ -16175,6 +17074,7 @@ [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); @@ -16191,6 +17091,7 @@ [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); @@ -16213,276 +17114,792 @@ DONE; }) -(define_insn "frndintxf2_floor" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT_FLOOR)) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" - "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" - [(set_attr "type" "frndint") - (set_attr "i387_cw" "floor") - (set_attr "mode" "XF")]) + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) -(define_expand "floordf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1, op2, op3)); +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])] + "") + +(define_insn_and_split "*fist2_1" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_with_temp (operands[0], operands[1], + operands[2])); DONE; -}) +} + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) -(define_expand "floorsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] +(define_insn "fist2" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" -{ - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1, op2, op3)); +(define_insn "fist2_with_temp" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); - DONE; -}) +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))] + "") -(define_expand "floorxf2" - [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" ""))] +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST))] + "") + +(define_expand "lrint2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_floor" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] { - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + ix86_optimize_mode_switching[I387_FLOOR] = 1; - emit_insn (gen_frndintxf2_floor (operands[0], operands[1], op2, op3)); + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + + emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1], + operands[2], operands[3])); DONE; -}) +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) -(define_insn "frndintxf2_ceil" +(define_insn "frndintxf2_floor_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT_CEIL)) + UNSPEC_FRNDINT_FLOOR)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" [(set_attr "type" "frndint") - (set_attr "i387_cw" "ceil") + (set_attr "i387_cw" "floor") (set_attr "mode" "XF")]) -(define_expand "ceildf2" +(define_expand "floorxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); + DONE; +}) + +(define_expand "floordf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_floor (op0, op1)); emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); DONE; }) -(define_expand "ceilsf2" +(define_expand "floorsf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_floor (op0, op1)); emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); DONE; }) -(define_expand "ceilxf2" - [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" ""))] +(define_insn_and_split "*fist2_floor_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] { - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + ix86_optimize_mode_switching[I387_FLOOR] = 1; - emit_insn (gen_frndintxf2_ceil (operands[0], operands[1], op2, op3)); + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_floor (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_floor_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } DONE; -}) +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) -(define_insn "frndintxf2_trunc" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT_TRUNC)) +(define_insn "fistdi2_floor" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" - "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" - [(set_attr "type" "frndint") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "XF")]) - -(define_expand "btruncdf2" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_floor_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_floor" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fist2_floor_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lfloor2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_ceil" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + + emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_ceil_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_expand "ceilxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); + DONE; +}) + +(define_expand "ceildf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_trunc (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_ceil (op0, op1)); emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); DONE; }) -(define_expand "btruncsf2" +(define_expand "ceilsf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_trunc (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_ceil (op0, op1)); emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); DONE; }) -(define_expand "btruncxf2" - [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" ""))] +(define_insn_and_split "*fist2_ceil_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_ceil (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_ceil_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fistdi2_ceil" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_ceil_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_ceil" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fist2_ceil_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lceil2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" + "") + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_trunc" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] { - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); - emit_insn (gen_frndintxf2_trunc (operands[0], operands[1], op2, op3)); + emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); DONE; -}) +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) -(define_insn "frndintxf2_mask_pm" +(define_insn "frndintxf2_trunc_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT_MASK_PM)) + UNSPEC_FRNDINT_TRUNC)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" - "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" [(set_attr "type" "frndint") - (set_attr "i387_cw" "mask_pm") + (set_attr "i387_cw" "trunc") (set_attr "mode" "XF")]) -(define_expand "nearbyintdf2" +(define_expand "btruncxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); + DONE; +}) + +(define_expand "btruncdf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_mask_pm (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_trunc (op0, op1)); emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); DONE; }) -(define_expand "nearbyintsf2" +(define_expand "btruncsf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_mask_pm (op0, op1, op2, op3)); + emit_insn (gen_frndintxf2_trunc (op0, op1)); emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); DONE; }) +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); + + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + (define_expand "nearbyintxf2" [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - rtx op2 = assign_386_stack_local (HImode, 1); - rtx op3 = assign_386_stack_local (HImode, 2); - - ix86_optimize_mode_switching = 1; + emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); + + DONE; +}) + +(define_expand "nearbyintdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + DONE; +}) - emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1], - op2, op3)); +(define_expand "nearbyintsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); DONE; }) @@ -16500,7 +17917,7 @@ (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:SI 2 "nonmemory_operand" "")) (use (match_operand:SI 3 "const_int_operand" ""))] - "! optimize_size" + "! optimize_size || TARGET_INLINE_ALL_STRINGOPS" { if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3])) DONE; @@ -16783,25 +18200,35 @@ (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_expand "clrmemsi" +(define_expand "setmemsi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:SI 1 "nonmemory_operand" "")) - (use (match_operand 2 "const_int_operand" ""))] + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" ""))] "" { - if (ix86_expand_clrmem (operands[0], operands[1], operands[2])) + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (ix86_expand_clrmem (operands[0], operands[1], operands[3])) DONE; else FAIL; }) -(define_expand "clrmemdi" +(define_expand "setmemdi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:DI 1 "nonmemory_operand" "")) - (use (match_operand 2 "const_int_operand" ""))] + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" ""))] "TARGET_64BIT" { - if (ix86_expand_clrmem (operands[0], operands[1], operands[2])) + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (ix86_expand_clrmem (operands[0], operands[1], operands[3])) DONE; else FAIL; @@ -17033,7 +18460,7 @@ (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_expand "cmpstrsi" +(define_expand "cmpstrnsi" [(set (match_operand:SI 0 "register_operand" "") (compare:SI (match_operand:BLK 1 "general_operand" "") (match_operand:BLK 2 "general_operand" ""))) @@ -17074,8 +18501,8 @@ emit_move_insn (operands[0], const0_rtx); DONE; } - emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } else { @@ -17083,8 +18510,8 @@ emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); else emit_insn (gen_cmpsi_1 (countreg, countreg)); - emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } outlow = gen_lowpart (QImode, out); @@ -17115,7 +18542,7 @@ ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. -(define_expand "cmpstrqi_nz_1" +(define_expand "cmpstrnqi_nz_1" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_operand 4 "memory_operand" "") (match_operand 5 "memory_operand" ""))) @@ -17128,7 +18555,7 @@ "" "") -(define_insn "*cmpstrqi_nz_1" +(define_insn "*cmpstrnqi_nz_1" [(set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) (mem:BLK (match_operand:SI 5 "register_operand" "1")))) @@ -17144,7 +18571,7 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "*cmpstrqi_nz_rex_1" +(define_insn "*cmpstrnqi_nz_rex_1" [(set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) (mem:BLK (match_operand:DI 5 "register_operand" "1")))) @@ -17162,7 +18589,7 @@ ;; The same, but the count is not known to not be zero. -(define_expand "cmpstrqi_1" +(define_expand "cmpstrnqi_1" [(parallel [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand 2 "register_operand" "") (const_int 0)) @@ -17178,7 +18605,7 @@ "" "") -(define_insn "*cmpstrqi_1" +(define_insn "*cmpstrnqi_1" [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") (const_int 0)) @@ -17197,7 +18624,7 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "*cmpstrqi_rex_1" +(define_insn "*cmpstrnqi_rex_1" [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") (const_int 0)) @@ -17280,9 +18707,9 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -;; Peephole optimizations to clean up after cmpstr*. This should be +;; Peephole optimizations to clean up after cmpstrn*. This should be ;; handled in combine, but it is not currently up to the task. -;; When used for their truth value, the cmpstr* expanders generate +;; When used for their truth value, the cmpstrn* expanders generate ;; code like this: ;; ;; repz cmpsb @@ -17293,7 +18720,7 @@ ;; ;; The intermediate three instructions are unnecessary. -;; This one handles cmpstr*_nz_1... +;; This one handles cmpstrn*_nz_1... (define_peephole2 [(parallel[ (set (reg:CC FLAGS_REG) @@ -17325,7 +18752,7 @@ (clobber (match_dup 2))])] "") -;; ...and this one handles cmpstr*_1. +;; ...and this one handles cmpstrn*_1. (define_peephole2 [(parallel[ (set (reg:CC FLAGS_REG) @@ -17392,7 +18819,7 @@ (set_attr "mode" "DI") (set_attr "length_immediate" "0")]) -(define_insn "movdicc_c_rex64" +(define_insn "*movdicc_c_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) @@ -17482,7 +18909,8 @@ (define_insn_and_split "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") (if_then_else:QI (match_operator 1 "ix86_comparison_operator" - [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) (match_operand:QI 2 "register_operand" "r,0") (match_operand:QI 3 "register_operand" "0,r")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" @@ -17503,16 +18931,16 @@ (if_then_else:SF (match_operand 1 "comparison_operator" "") (match_operand:SF 2 "register_operand" "") (match_operand:SF 3 "register_operand" "")))] - "TARGET_CMOVE" + "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") -(define_insn "*movsfcc_1" - [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f") +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SF 2 "nonimmediate_operand" "f#r,0,rm#f,0") - (match_operand:SF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))] - "TARGET_CMOVE + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ fcmov%F1\t{%2, %0|%0, %2} @@ -17527,16 +18955,16 @@ (if_then_else:DF (match_operand 1 "comparison_operator" "") (match_operand:DF 2 "register_operand" "") (match_operand:DF 3 "register_operand" "")))] - "TARGET_CMOVE" + "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") (define_insn "*movdfcc_1" - [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f") + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:DF 2 "nonimmediate_operand" "f#r,0,rm#f,0") - (match_operand:DF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))] - "!TARGET_64BIT && TARGET_CMOVE + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ fcmov%F1\t{%2, %0|%0, %2} @@ -17547,12 +18975,12 @@ (set_attr "mode" "DF")]) (define_insn "*movdfcc_1_rex64" - [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,r#f,r#f") + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:DF 2 "nonimmediate_operand" "f#r,0#r,rm#f,0#f") - (match_operand:DF 3 "nonimmediate_operand" "0#r,f#r,0#f,rm#f")))] - "TARGET_64BIT && TARGET_CMOVE + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ fcmov%F1\t{%2, %0|%0, %2} @@ -17565,7 +18993,8 @@ (define_split [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] "!TARGET_64BIT && reload_completed" @@ -17586,7 +19015,7 @@ (if_then_else:XF (match_operand 1 "comparison_operator" "") (match_operand:XF 2 "register_operand" "") (match_operand:XF 3 "register_operand" "")))] - "TARGET_CMOVE" + "TARGET_80387 && TARGET_CMOVE" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") (define_insn "*movxfcc_1" @@ -17595,62 +19024,125 @@ [(reg FLAGS_REG) (const_int 0)]) (match_operand:XF 2 "register_operand" "f,0") (match_operand:XF 3 "register_operand" "0,f")))] - "TARGET_CMOVE" + "TARGET_80387 && TARGET_CMOVE" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3}" [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) -(define_expand "minsf3" - [(parallel [ - (set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SSE" - "") +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. -(define_insn "*minsf" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0,0,f#x") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE && TARGET_IEEE_FP" - "#") +(define_insn "sminsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) -(define_insn "*minsf_nonieee" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") - (if_then_else:SF (lt (match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") +(define_insn "smaxsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:SF (lt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) +(define_insn "smindf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "smaxdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*ieee_sminsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE_MATH" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smaxsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE_MATH" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smindf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "*ieee_smaxdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[1], operands[0]);") ;; Conditional addition patterns (define_expand "addqicc" @@ -17685,282 +19177,6 @@ "TARGET_64BIT" "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") -;; We can't represent the LT test directly. Do this by swapping the operands. - -(define_split - [(set (match_operand:SF 0 "fp_register_operand" "") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "register_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (if_then_else:SF (ge (reg:CCFP FLAGS_REG) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*minsf_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE && reload_completed" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_expand "mindf3" - [(parallel [ - (set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#") - -(define_insn "*mindf" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0,0,f#Y") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH" - "#") - -(define_insn "*mindf_nonieee" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") - (if_then_else:DF (lt (match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:DF (lt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -;; We can't represent the LT test directly. Do this by swapping the operands. -(define_split - [(set (match_operand:DF 0 "fp_register_operand" "") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "register_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_dup 2) - (match_dup 1))) - (set (match_dup 0) - (if_then_else:DF (ge (reg:CCFP FLAGS_REG) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*mindf_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) - -(define_expand "maxsf3" - [(parallel [ - (set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SSE" - "#") - -(define_insn "*maxsf" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0,0,f#x") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE && TARGET_IEEE_FP" - "#") - -(define_insn "*maxsf_nonieee" - [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") - (if_then_else:SF (gt (match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:SF (gt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -(define_split - [(set (match_operand:SF 0 "fp_register_operand" "") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "register_operand" "")) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (if_then_else:SF (gt (reg:CCFP FLAGS_REG) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*maxsf_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE && reload_completed" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_expand "maxdf3" - [(parallel [ - (set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#") - -(define_insn "*maxdf" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0,0,f#Y") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP" - "#") - -(define_insn "*maxdf_nonieee" - [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") - (if_then_else:DF (gt (match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) - (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "#") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "nonimmediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (match_dup 0) - (if_then_else:DF (gt (match_dup 1) - (match_dup 2)) - (match_dup 1) - (match_dup 2)))]) - -(define_split - [(set (match_operand:DF 0 "fp_register_operand" "") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "register_operand" "")) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ((operands_match_p (operands[1], operands[3]) - && operands_match_p (operands[2], operands[4])) - || (operands_match_p (operands[1], operands[4]) - && operands_match_p (operands[2], operands[3])))" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (if_then_else:DF (gt (reg:CCFP FLAGS_REG) (const_int 0)) - (match_dup 1) - (match_dup 2)))]) - -(define_insn "*maxdf_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "Ym")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "DF")]) ;; Misc patterns (?) @@ -18002,7 +19218,7 @@ return "lea{l}\t{%a2, %0|%0, %a2}"; default: - abort (); + gcc_unreachable (); } } [(set (attr "type") @@ -18045,7 +19261,7 @@ return "lea{q}\t{%a2, %0|%0, %a2}"; default: - abort (); + gcc_unreachable (); } } [(set (attr "type") @@ -18076,375 +19292,17 @@ return "lea{q}\t{%a2, %0|%0, %a2}"; default: - abort (); + gcc_unreachable (); } } [(set_attr "type" "alu,lea") (set_attr "mode" "DI")]) -;; Placeholder for the conditional moves. This one is split either to SSE -;; based moves emulation or to usual cmove sequence. Little bit unfortunate -;; fact is that compares supported by the cmp??ss instructions are exactly -;; swapped of those supported by cmove sequence. -;; The EQ/NE comparisons also needs bit care, since they are not directly -;; supported by i387 comparisons and we do need to emit two conditional moves -;; in tandem. - -(define_insn "sse_movsfcc" - [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f") - (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")]) - (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx") - (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx"))) - (clobber (match_scratch:SF 6 "=2,&4,X,X,X,X,X,X,X,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) - /* Avoid combine from being smart and converting min/max - instruction patterns into conditional moves. */ - && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT - && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) - || !rtx_equal_p (operands[4], operands[2]) - || !rtx_equal_p (operands[5], operands[3])) - && (!TARGET_IEEE_FP - || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" - "#") - -(define_insn "sse_movsfcc_eq" - [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?r#xf,?r#xf") - (if_then_else:SF (eq (match_operand:SF 3 "nonimmediate_operand" "%0#fx,x#fx,f#x,xm#f,f#x,xm#f") - (match_operand:SF 4 "nonimmediate_operand" "xm#f,xm#f,f#x,x#f,f#x,x#f")) - (match_operand:SF 1 "nonimmediate_operand" "x#fr,0#fr,0#fx,0#fx,0#rx,0#rx") - (match_operand:SF 2 "nonimmediate_operand" "x#fr,x#fr,f#fx,f#fx,rm#rx,rm#rx"))) - (clobber (match_scratch:SF 5 "=1,&3,X,X,X,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" - "#") - -(define_insn "sse_movdfcc" - [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?f#Yr,?f#Yr,?r#Yf,?r#Yf,?r#Yf,?r#Yf") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "0#fY,Y#fY,f#Y,f#Y,Ym#f,Ym#f,f#Y,f#Y,Ym#f,Ym#f") - (match_operand:DF 5 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,f#Y,Y#f,Y#f,f#Y,f#Y,Y#f,Y#f")]) - (match_operand:DF 2 "nonimmediate_operand" "Y#fr,0#fr,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY,0#rY") - (match_operand:DF 3 "nonimmediate_operand" "Y#fr,Y#fr,0#fY,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY"))) - (clobber (match_scratch:DF 6 "=2,&4,X,X,X,X,X,X,X,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) - /* Avoid combine from being smart and converting min/max - instruction patterns into conditional moves. */ - && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT - && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) - || !rtx_equal_p (operands[4], operands[2]) - || !rtx_equal_p (operands[5], operands[3])) - && (!TARGET_IEEE_FP - || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" - "#") - -(define_insn "sse_movdfcc_eq" - [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?r#Yf,?r#Yf") - (if_then_else:DF (eq (match_operand:DF 3 "nonimmediate_operand" "%0#fY,Y#fY,f#Y,Ym#f,f#Y,Ym#f") - (match_operand:DF 4 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,Y#f,f#Y,Y#f")) - (match_operand:DF 1 "nonimmediate_operand" "Y#fr,0#fr,0#fY,0#fY,0#rY,0#rY") - (match_operand:DF 2 "nonimmediate_operand" "Y#fr,Y#fr,f#fY,f#fY,rm#rY,rm#rY"))) - (clobber (match_scratch:DF 5 "=1,&3,X,X,X,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE - && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" - "#") - -;; For non-sse moves just expand the usual cmove sequence. -(define_split - [(set (match_operand 0 "register_operand" "") - (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "nonimmediate_operand" "") - (match_operand 5 "register_operand" "")]) - (match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "nonimmediate_operand" ""))) - (clobber (match_operand 6 "" "")) - (clobber (reg:CC FLAGS_REG))] - "!SSE_REG_P (operands[0]) && reload_completed - && (GET_MODE (operands[0]) == SFmode - || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))" - [(const_int 0)] +(define_expand "allocate_stack_worker" + [(match_operand:SI 0 "register_operand" "")] + "TARGET_STACK_PROBE" { - ix86_compare_op0 = operands[5]; - ix86_compare_op1 = operands[4]; - operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), - VOIDmode, operands[5], operands[4]); - ix86_expand_fp_movcc (operands); - DONE; -}) - -;; Split SSE based conditional move into sequence: -;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison -;; and op2, op0 - zero op2 if comparison was false -;; nand op0, op3 - load op3 to op0 if comparison was false -;; or op2, op0 - get the nonzero one into the result. -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operator:SF 1 "sse_comparison_operator" - [(match_operand:SF 4 "register_operand" "") - (match_operand:SF 5 "nonimmediate_operand" "")]) - (match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" ""))) - (clobber (match_operand 6 "" "")) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed" - [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)])) - (set (match_dup 2) (and:V4SF (match_dup 2) - (match_dup 8))) - (set (match_dup 8) (and:V4SF (not:V4SF (match_dup 8)) - (match_dup 3))) - (set (match_dup 0) (ior:V4SF (match_dup 6) - (match_dup 7)))] -{ - /* If op2 == op3, op3 would be clobbered before it is used. */ - if (operands_match_p (operands[2], operands[3])) - { - emit_move_insn (operands[0], operands[2]); - DONE; - } - - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (operands_match_p (operands[0], operands[4])) - operands[6] = operands[4], operands[7] = operands[2]; - else - operands[6] = operands[2], operands[7] = operands[4]; - operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); - operands[2] = simplify_gen_subreg (V4SFmode, operands[2], SFmode, 0); - operands[3] = simplify_gen_subreg (V4SFmode, operands[3], SFmode, 0); - operands[8] = simplify_gen_subreg (V4SFmode, operands[4], SFmode, 0); - operands[6] = simplify_gen_subreg (V4SFmode, operands[6], SFmode, 0); - operands[7] = simplify_gen_subreg (V4SFmode, operands[7], SFmode, 0); -}) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operator:DF 1 "sse_comparison_operator" - [(match_operand:DF 4 "register_operand" "") - (match_operand:DF 5 "nonimmediate_operand" "")]) - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" ""))) - (clobber (match_operand 6 "" "")) - (clobber (reg:CC FLAGS_REG))] - "SSE_REG_P (operands[0]) && reload_completed" - [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)])) - (set (match_dup 2) (and:V2DF (match_dup 2) - (match_dup 8))) - (set (match_dup 8) (and:V2DF (not:V2DF (match_dup 8)) - (match_dup 3))) - (set (match_dup 0) (ior:V2DF (match_dup 6) - (match_dup 7)))] -{ - if (TARGET_SSE_SPLIT_REGS && !optimize_size) - { - rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); - emit_insn (gen_sse2_unpcklpd (op, op, op)); - op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); - emit_insn (gen_sse2_unpcklpd (op, op, op)); - } - - /* If op2 == op3, op3 would be clobbered before it is used. */ - if (operands_match_p (operands[2], operands[3])) - { - emit_move_insn (operands[0], operands[2]); - DONE; - } - - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (operands_match_p (operands[0], operands[4])) - operands[6] = operands[4], operands[7] = operands[2]; - else - operands[6] = operands[2], operands[7] = operands[4]; - operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); - operands[2] = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); - operands[3] = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); - operands[8] = simplify_gen_subreg (V2DFmode, operands[4], DFmode, 0); - operands[6] = simplify_gen_subreg (V2DFmode, operands[6], DFmode, 0); - operands[7] = simplify_gen_subreg (V2DFmode, operands[7], DFmode, 0); -}) - -;; Special case of conditional move we can handle effectively. -;; Do not brother with the integer/floating point case, since these are -;; bot considerably slower, unlike in the generic case. -(define_insn "*sse_movsfcc_const0_1" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "register_operand" "0") - (match_operand:SF 5 "nonimmediate_operand" "xm")]) - (match_operand:SF 2 "register_operand" "x") - (match_operand:SF 3 "const0_operand" "X")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_2" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "sse_comparison_operator" - [(match_operand:SF 4 "register_operand" "0") - (match_operand:SF 5 "nonimmediate_operand" "xm")]) - (match_operand:SF 2 "const0_operand" "X") - (match_operand:SF 3 "register_operand" "x")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_3" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "xm") - (match_operand:SF 5 "register_operand" "0")]) - (match_operand:SF 2 "register_operand" "x") - (match_operand:SF 3 "const0_operand" "X")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movsfcc_const0_4" - [(set (match_operand:SF 0 "register_operand" "=&x") - (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "xm") - (match_operand:SF 5 "register_operand" "0")]) - (match_operand:SF 2 "const0_operand" "X") - (match_operand:SF 3 "register_operand" "x")))] - "TARGET_SSE" - "#") - -(define_insn "*sse_movdfcc_const0_1" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "register_operand" "0") - (match_operand:DF 5 "nonimmediate_operand" "Ym")]) - (match_operand:DF 2 "register_operand" "Y") - (match_operand:DF 3 "const0_operand" "X")))] - "TARGET_SSE2" - "#") - -(define_insn "*sse_movdfcc_const0_2" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "sse_comparison_operator" - [(match_operand:DF 4 "register_operand" "0") - (match_operand:DF 5 "nonimmediate_operand" "Ym")]) - (match_operand:DF 2 "const0_operand" "X") - (match_operand:DF 3 "register_operand" "Y")))] - "TARGET_SSE2" - "#") - -(define_insn "*sse_movdfcc_const0_3" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "Ym") - (match_operand:DF 5 "register_operand" "0")]) - (match_operand:DF 2 "register_operand" "Y") - (match_operand:DF 3 "const0_operand" "X")))] - "TARGET_SSE2" - "#") - -(define_insn "*sse_movdfcc_const0_4" - [(set (match_operand:DF 0 "register_operand" "=&Y") - (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "Ym") - (match_operand:DF 5 "register_operand" "0")]) - (match_operand:DF 2 "const0_operand" "X") - (match_operand:DF 3 "register_operand" "Y")))] - "TARGET_SSE2" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(match_operand:SF 4 "nonimmediate_operand" "") - (match_operand:SF 5 "nonimmediate_operand" "")]) - (match_operand:SF 2 "nonmemory_operand" "") - (match_operand:SF 3 "nonmemory_operand" "")))] - "SSE_REG_P (operands[0]) && reload_completed - && (const0_operand (operands[2], GET_MODE (operands[0])) - || const0_operand (operands[3], GET_MODE (operands[0])))" - [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) - (set (match_dup 8) (and:V4SF (match_dup 6) (match_dup 7)))] -{ - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode) - || !rtx_equal_p (operands[0], operands[4])) - { - rtx tmp = operands[5]; - operands[5] = operands[4]; - operands[4] = tmp; - PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); - } - if (!rtx_equal_p (operands[0], operands[4])) - abort (); - operands[8] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); - if (const0_operand (operands[2], GET_MODE (operands[2]))) - { - operands[7] = operands[3]; - operands[6] = gen_rtx_NOT (V4SFmode, operands[8]); - } - else - { - operands[7] = operands[2]; - operands[6] = operands[8]; - } - operands[7] = simplify_gen_subreg (V4SFmode, operands[7], SFmode, 0); -}) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(match_operand:DF 4 "nonimmediate_operand" "") - (match_operand:DF 5 "nonimmediate_operand" "")]) - (match_operand:DF 2 "nonmemory_operand" "") - (match_operand:DF 3 "nonmemory_operand" "")))] - "SSE_REG_P (operands[0]) && reload_completed - && (const0_operand (operands[2], GET_MODE (operands[0])) - || const0_operand (operands[3], GET_MODE (operands[0])))" - [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) - (set (match_dup 8) (and:V2DF (match_dup 6) (match_dup 7)))] -{ - if (TARGET_SSE_SPLIT_REGS && !optimize_size) - { - if (REG_P (operands[2])) - { - rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); - emit_insn (gen_sse2_unpcklpd (op, op, op)); - } - if (REG_P (operands[3])) - { - rtx op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); - emit_insn (gen_sse2_unpcklpd (op, op, op)); - } - } - PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode) - || !rtx_equal_p (operands[0], operands[4])) - { - rtx tmp = operands[5]; - operands[5] = operands[4]; - operands[4] = tmp; - PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); - } - if (!rtx_equal_p (operands[0], operands[4])) - abort (); - operands[8] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); - if (const0_operand (operands[2], GET_MODE (operands[2]))) - { - operands[7] = operands[3]; - operands[6] = gen_rtx_NOT (V2DFmode, operands[8]); - } - else - { - operands[7] = operands[2]; - operands[6] = operands[8]; - } - operands[7] = simplify_gen_subreg (V2DFmode, operands[7], DFmode, 0); -}) - -(define_expand "allocate_stack_worker" - [(match_operand:SI 0 "register_operand" "")] - "TARGET_STACK_PROBE" -{ - if (reload_completed) + if (reload_completed) { if (TARGET_64BIT) emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0])); @@ -18529,7 +19387,21 @@ [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" { - emit_insn (gen_set_got (pic_offset_table_rtx)); + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = gen_rtx_CONST (SImode, + gen_rtx_MINUS (SImode, + gen_rtx_LABEL_REF (SImode, label_rtx), + gen_rtx_SYMBOL_REF (SImode, GOT_SYMBOL_NAME))); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else + emit_insn (gen_set_got (pic_offset_table_rtx)); DONE; }) @@ -18544,8 +19416,9 @@ "! TARGET_PARTIAL_REG_STALL && reload_completed && ((GET_MODE (operands[0]) == HImode && ((!optimize_size && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ || GET_CODE (operands[2]) != CONST_INT - || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))) + || satisfies_constraint_K (operands[2]))) || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_size)))" [(parallel [(set (match_dup 0) @@ -18664,7 +19537,8 @@ [(set (match_operand:SI 0 "push_operand" "") (match_operand:SI 1 "memory_operand" "")) (match_scratch:SI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -18673,7 +19547,8 @@ [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "memory_operand" "")) (match_scratch:DI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -18684,7 +19559,8 @@ [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" "")) (match_scratch:SF 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -18693,7 +19569,8 @@ [(set (match_operand:HI 0 "push_operand" "") (match_operand:HI 1 "memory_operand" "")) (match_scratch:HI 2 "r")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -18702,7 +19579,8 @@ [(set (match_operand:QI 0 "push_operand" "") (match_operand:QI 1 "memory_operand" "")) (match_scratch:QI 2 "q")] - "! optimize_size && ! TARGET_PUSH_MEMORY" + "!optimize_size && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -18862,8 +19740,7 @@ (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) && (true_regnum (operands[2]) != 0 - || (GET_CODE (operands[3]) == CONST_INT - && CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'K'))) + || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) @@ -18996,17 +19873,16 @@ ;; Attempt to always use XOR for zeroing registers. (define_peephole2 [(set (match_operand 0 "register_operand" "") - (const_int 0))] - "(GET_MODE (operands[0]) == QImode - || GET_MODE (operands[0]) == HImode - || GET_MODE (operands[0]) == SImode - || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) + (match_operand 1 "const0_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD && (! TARGET_USE_MOV0 || optimize_size) + && GENERAL_REG_P (operands[0]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] - "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, - operands[0]);") +{ + operands[0] = gen_lowpart (word_mode, operands[0]); +}) (define_peephole2 [(set (strict_low_part (match_operand 0 "register_operand" "")) @@ -19429,9 +20305,8 @@ (mult:DI (match_operand:DI 1 "memory_operand" "") (match_operand:DI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_K8 && !optimize_size - && (GET_CODE (operands[2]) != CONST_INT - || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] @@ -19443,9 +20318,8 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_K8 && !optimize_size - && (GET_CODE (operands[2]) != CONST_INT - || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] @@ -19458,9 +20332,8 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_K8 && !optimize_size - && (GET_CODE (operands[2]) != CONST_INT - || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] @@ -19476,8 +20349,8 @@ (match_operand:DI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:DI 3 "r")] - "TARGET_K8 && !optimize_size - && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] @@ -19492,8 +20365,8 @@ (match_operand:SI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:SI 3 "r")] - "TARGET_K8 && !optimize_size - && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] @@ -19508,7 +20381,7 @@ (match_operand:HI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:HI 3 "r")] - "TARGET_K8 && !optimize_size" + "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] @@ -19516,6 +20389,64 @@ if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); }) + +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax + +(define_peephole2 + [(parallel [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand 3 "register_operand") + (match_operand 4 "x86_64_general_operand" "")) + (parallel [(set (match_operand 5 "register_operand" "") + (plus (match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3 + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + /* We reorder load and the shift. */ + && !rtx_equal_p (operands[1], operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + /* Last PLUS must consist of operand 0 and 3. */ + && !rtx_equal_p (operands[0], operands[3]) + && (rtx_equal_p (operands[3], operands[6]) + || rtx_equal_p (operands[3], operands[7])) + && (rtx_equal_p (operands[0], operands[6]) + || rtx_equal_p (operands[0], operands[7])) + /* The intermediate operand 0 must die or be same as output. */ + && (rtx_equal_p (operands[0], operands[5]) + || peep2_reg_dead_p (3, operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (Pmode, operands[1]); + rtx base = gen_lowpart (Pmode, operands[3]); + rtx dest = gen_lowpart (mode, operands[5]); + + operands[1] = gen_rtx_PLUS (Pmode, base, + gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + if (mode != Pmode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + operands[0] = dest; +}) ;; Call-value patterns last so that the wildcard operand does not ;; disrupt insn-recog's switch tables. @@ -19635,4499 +20566,307 @@ "jmp\t*%%r11" [(set_attr "type" "callv")]) +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. (define_insn "trap" - [(trap_if (const_int 1) (const_int 5))] + [(trap_if (const_int 1) (const_int 6))] "" - "int\t$5") - -;;; ix86 doesn't have conditional trap instructions, but we fake them -;;; for the sake of bounds checking. By emitting bounds checks as -;;; conditional traps rather than as conditional jumps around -;;; unconditional traps we avoid introducing spurious basic-block -;;; boundaries and facilitate elimination of redundant checks. In -;;; honor of the too-inflexible-for-BPs `bound' instruction, we use -;;; interrupt 5. -;;; -;;; FIXME: Static branch prediction rules for ix86 are such that -;;; forward conditional branches predict as untaken. As implemented -;;; below, pseudo conditional traps violate that rule. We should use -;;; .pushsection/.popsection to place all of the `int 5's in a special -;;; section loaded at the end of the text segment and branch forward -;;; there on bounds-failure, and then jump back immediately (in case -;;; the system chooses to ignore bounds violations, or to report -;;; violations and continue execution). - -(define_expand "conditional_trap" - [(trap_if (match_operator 0 "comparison_operator" - [(match_dup 2) (const_int 0)]) - (match_operand 1 "const_int_operand" ""))] - "" -{ - emit_insn (gen_rtx_TRAP_IF (VOIDmode, - ix86_expand_compare (GET_CODE (operands[0]), - NULL, NULL), - operands[1])); - DONE; -}) + { return ASM_SHORT "0x0b0f"; } + [(set_attr "length" "2")]) -(define_insn "*conditional_trap_1" - [(trap_if (match_operator 0 "comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand 1 "const_int_operand" ""))] - "" +(define_expand "sse_prologue_save" + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (label_ref:DI (match_operand 3 "" "")))])] + "TARGET_64BIT" + "") + +(define_insn "*sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X")))] + "TARGET_64BIT + && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" + "* { - operands[2] = gen_label_rtx (); - output_asm_insn ("j%c0\t%l2\; int\t%1", operands); - (*targetm.asm_out.internal_label) (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[2])); + int i; + operands[0] = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, operands[0], operands[4])); + output_asm_insn (\"jmp\\t%A1\", operands); + for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + { + operands[4] = adjust_address (operands[0], DImode, i*16); + operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); + PUT_MODE (operands[4], TImode); + if (GET_CODE (XEXP (operands[0], 0)) != PLUS) + output_asm_insn (\"rex\", operands); + output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + } + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[3])); RET; -}) - - ;; Pentium III SIMD instructions. - -;; Moves for SSE/MMX regs. +} + " + [(set_attr "type" "other") + (set_attr "length_immediate" "0") + (set_attr "length_address" "0") + (set_attr "length" "135") + (set_attr "memory" "store") + (set_attr "modrm" "0") + (set_attr "mode" "DI")]) -(define_expand "movv4sf" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (match_operand:V4SF 1 "nonimmediate_operand" ""))] - "TARGET_SSE" +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" { - ix86_expand_vector_move (V4SFmode, operands); - DONE; -}) + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); -(define_insn "*movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") - (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + gcc_assert (rw == 0 || rw == 1); + gcc_assert (locality >= 0 && locality <= 3); + gcc_assert (GET_MODE (operands[0]) == Pmode + || GET_MODE (operands[0]) == VOIDmode); -(define_split - [(set (match_operand:V4SF 0 "register_operand" "") - (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] - "TARGET_SSE && reload_completed" - [(set (match_dup 0) - (vec_merge:V4SF - (vec_duplicate:V4SF (match_dup 1)) - (match_dup 2) - (const_int 1)))] -{ - operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); - operands[2] = CONST0_RTX (V4SFmode); + /* Use 3dNOW prefetch in case we are asking for write prefetch not + supported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; }) -(define_expand "movv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "") - (match_operand:V2DF 1 "nonimmediate_operand" ""))] - "TARGET_SSE" +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && !TARGET_64BIT" { - ix86_expand_vector_move (V2DFmode, operands); - DONE; -}) + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; -(define_insn "*movv2df_internal" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") - (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "xorpd\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movapd\t{%1, %0|%0, %1}"; - default: - abort (); - } + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; } - [(set_attr "type" "ssemov") - (set (attr "mode") - (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (const_string "V4SF") - (eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "V2DF")) - (eq_attr "alternative" "2") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "V2DF"))] - (const_string "V2DF")))]) + [(set_attr "type" "sse") + (set_attr "memory" "none")]) -(define_split - [(set (match_operand:V2DF 0 "register_operand" "") - (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] - "TARGET_SSE2 && reload_completed" - [(set (match_dup 0) - (vec_merge:V2DF - (vec_duplicate:V2DF (match_dup 1)) - (match_dup 2) - (const_int 1)))] +(define_insn "*prefetch_sse_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && TARGET_64BIT" { - operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); - operands[2] = CONST0_RTX (V2DFmode); -}) + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; -;; 16 byte integral modes handled by SSE, minus TImode, which gets -;; special-cased for TARGET_64BIT. -(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); -(define_expand "mov" - [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "memory" "none")]) -(define_insn "*mov_internal" - [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "2") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "TI")))]) - -;; 8 byte integral modes handled by MMX (and by extension, SSE) -(define_mode_macro MMXMODEI [V8QI V4HI V2SI]) - -(define_expand "mov" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") - (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -(define_insn "*mov_internal_rex64" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=rm,r,*y,*y ,m ,*y,Y ,x,x ,m,r,x") - (match_operand:MMXMODEI 1 "vector_move_operand" - "Cr ,m,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] - "TARGET_64BIT && TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DI")]) - -(define_insn "*mov_internal" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=*y,*y ,m ,*y,*Y,*x,*x ,m") - (match_operand:MMXMODEI 1 "vector_move_operand" - "C ,*ym,*y,*Y,*y,C ,*xm,*x"))] - "TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov") - (set_attr "mode" "DI")]) - -(define_expand "movv2sf" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "") - (match_operand:V2SF 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V2SFmode, operands); - DONE; -}) - -(define_insn "*movv2sf_internal_rex64" - [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=rm,r,*y ,*y ,m ,*y,Y ,x,x ,m,r,x") - (match_operand:V2SF 1 "vector_move_operand" - "Cr ,m ,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] - "TARGET_64BIT && TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movlps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V2SF,V2SF,DI,DI")]) - -(define_insn "*movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=*y,*y ,m,*y,*Y,*x,*x ,m") - (match_operand:V2SF 1 "vector_move_operand" - "C ,*ym,*y,*Y,*y,C ,*xm,*x"))] - "TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movlps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")]) - -(define_expand "movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "nonimmediate_operand" ""))] - "TARGET_SSE || TARGET_64BIT" -{ - if (TARGET_64BIT) - ix86_expand_move (TImode, operands); - else - ix86_expand_vector_move (TImode, operands); - DONE; -}) - -(define_insn "*movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "2") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI"))] - (const_string "TI")))]) - -(define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_64BIT" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) - -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_mode_macro SSEPUSH [V16QI V8HI V4SI V2DI TI V4SF V2DF]) - -(define_insn "*push" - [(set (match_operand:SSEPUSH 0 "push_operand" "=<") - (match_operand:SSEPUSH 1 "register_operand" "x"))] - "TARGET_SSE" - "#") - -(define_mode_macro MMXPUSH [V8QI V4HI V2SI V2SF]) - -(define_insn "*push" - [(set (match_operand:MMXPUSH 0 "push_operand" "=<") - (match_operand:MMXPUSH 1 "register_operand" "xy"))] - "TARGET_MMX" - "#") - -(define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "register_operand" ""))] - "!TARGET_64BIT && reload_completed - && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" - [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 3))) - (set (match_dup 2) (match_dup 1))] - "operands[2] = change_address (operands[0], GET_MODE (operands[0]), - stack_pointer_rtx); - operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") - -(define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "register_operand" ""))] - "TARGET_64BIT && reload_completed - && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" - [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 3))) - (set (match_dup 2) (match_dup 1))] - "operands[2] = change_address (operands[0], GET_MODE (operands[0]), - stack_pointer_rtx); - operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") - - -(define_split - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -;; All 16-byte vector modes handled by SSE -(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) - -(define_expand "movmisalign" - [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") - (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move_misalign (mode, operands); - DONE; -}) - -;; All 8-byte vector modes handled by MMX -(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF]) - -(define_expand "movmisalign" - [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") - (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -;; These two patterns are useful for specifying exactly whether to use -;; movaps or movups -(define_expand "sse_movaps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] - UNSPEC_MOVA))] - "TARGET_SSE" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - { - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_movaps (tmp, operands[1])); - emit_move_insn (operands[0], tmp); - DONE; - } -}) - -(define_insn "*sse_movaps_1" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov,ssemov") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_movups" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "TARGET_SSE" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - { - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_movups (tmp, operands[1])); - emit_move_insn (operands[0], tmp); - DONE; - } -}) - -(define_insn "*sse_movups_1" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movups\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssecvt") - (set_attr "mode" "V4SF")]) - -;; SSE Strange Moves. - -(define_insn "sse_movmskps" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE" - "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] - UNSPEC_MOVMSK))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -(define_insn "mmx_maskmovq" - [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] - UNSPEC_MASKMOV))] - "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_maskmovq_rex" - [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] - UNSPEC_MASKMOV))] - "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "sse_movntv4sf" - [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE" - "movntps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_movntdi" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] - UNSPEC_MOVNT))] - "TARGET_SSE || TARGET_3DNOW_A" - "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov") - (set_attr "mode" "DI")]) - -(define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 3)))] - "TARGET_SSE" - "movhlps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_movlhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 12)))] - "TARGET_SSE" - "movlhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_movhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0") - (match_operand:V4SF 2 "nonimmediate_operand" "m,x") - (const_int 12)))] - "TARGET_SSE - && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_movlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0") - (match_operand:V4SF 2 "nonimmediate_operand" "m,x") - (const_int 3)))] - "TARGET_SSE - && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movlps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_loadss" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "memory_operand" "")] - "TARGET_SSE" -{ - emit_insn (gen_sse_loadss_1 (operands[0], operands[1], - CONST0_RTX (V4SFmode))); - DONE; -}) - -(define_insn "sse_loadss_1" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) - (match_operand:V4SF 2 "const0_operand" "X") - (const_int 1)))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) - -(define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x") - (const_int 14)))] - "TARGET_SSE" - "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) - -(define_insn "sse_storess" - [(set (match_operand:SF 0 "memory_operand" "=m") - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) - -(define_insn "sse_shufps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE" - ;; @@@ check operand order for intel/nonintel syntax - "shufps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -;; SSE arithmetic - -(define_insn "addv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "addps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "vmaddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "addss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "subv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "subps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "subss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -;; ??? Should probably be done by generic code instead. -(define_expand "negv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_dup 2)))] - "TARGET_SSE" -{ - rtx m0 = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); - rtx vm0 = gen_rtx_CONST_VECTOR (V4SFmode, gen_rtvec (4, m0, m0, m0, m0)); - operands[2] = force_reg (V4SFmode, vm0); -}) - -(define_insn "mulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "mulps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "V4SF")]) - -(define_insn "vmmulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "mulss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "SF")]) - -(define_insn "divv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "divps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "V4SF")]) - -(define_insn "vmdivv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "divss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "SF")]) - - -;; SSE square root/reciprocal - -(define_insn "rcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] - "TARGET_SSE" - "rcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmrcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RCP) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_insn "rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "TARGET_SSE" - "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmrsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RSQRT) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_insn "sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -;; SSE logical operations. - -;; SSE defines logical operations on floating point values. This brings -;; interesting challenge to RTL representation where logicals are only valid -;; on integral types. We deal with this by representing the floating point -;; logical as logical on arguments casted to TImode as this is what hardware -;; really does. Unfortunately hardware requires the type information to be -;; present and thus we must avoid subregs from being simplified and eliminated -;; in later compilation phases. -;; -;; We have following variants from each instruction: -;; sse_andsf3 - the operation taking V4SF vector operands -;; and doing TImode cast on them -;; *sse_andsf3_memory - the operation taking one memory operand casted to -;; TImode, since backend insist on eliminating casts -;; on memory operands -;; sse_andti3_sf_1 - the operation taking SF scalar operands. -;; We cannot accept memory operand here as instruction reads -;; whole scalar. This is generated only post reload by GCC -;; scalar float operations that expands to logicals (fabs) -;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode -;; memory operand. Eventually combine can be able -;; to synthesize these using splitter. -;; sse2_anddf3, *sse2_anddf3_memory -;; -;; -;; These are not called andti3 etc. because we really really don't want -;; the compiler to widen DImode ands to TImode ands and then try to move -;; into DImode subregs of SSE registers, and them together, and move out -;; of DImode subregs again! -;; SSE1 single precision floating point logical operation -(define_expand "sse_andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (and:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") - -(define_insn "*sse_andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_nandv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "")) - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") - -(define_insn "*sse_nandv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (ior:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") - -(define_insn "*sse_iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (xor:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") - -(define_insn "*sse_xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) - -;; SSE2 double precision floating point logical operation - -(define_expand "sse2_andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (and:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") - -(define_insn "*sse2_andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_nandv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "")) - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") - -(define_insn "*sse2_nandv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (ior:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") - -(define_insn "*sse2_iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") - -(define_insn "*sse2_xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -;; SSE2 integral logicals. These patterns must always come after floating -;; point ones since we don't want compiler to use integer opcodes on floating -;; point SSE values to avoid matching of subregs in the match_operand. -(define_insn "*sse2_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "sse2_andv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "sse2_nandv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "register_operand" "0")) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "sse2_iorv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "*sse2_xorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "sse2_xorv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -;; Use xor, but don't show input operands so they aren't live before -;; this insn. -(define_insn "sse_clrv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (match_operand:V4SF 1 "const0_operand" "X"))] - "TARGET_SSE" -{ - if (get_attr_mode (insn) == MODE_TI) - return "pxor\t{%0, %0|%0, %0}"; - else - return "xorps\t{%0, %0|%0, %0}"; -} - [(set_attr "type" "sselog") - (set_attr "memory" "none") - (set (attr "mode") - (if_then_else - (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE2") - (const_int 0))) - (eq (symbol_ref "optimize_size") - (const_int 0))) - (const_string "TI") - (const_string "V4SF")))]) - -;; Use xor, but don't show input operands so they aren't live before -;; this insn. -(define_insn "sse_clrv2df" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(const_int 0)] UNSPEC_NOP))] - "TARGET_SSE2" - "xorpd\t{%0, %0|%0, %0}" - [(set_attr "type" "sselog") - (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) - -;; SSE mask-generating compares - -(define_insn "maskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]))] - "TARGET_SSE" - "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V4SF")]) - -(define_insn "maskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordps\t{%2, %0|%0, %2}"; - else - return "cmpn%D3ps\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "V4SF")]) - -(define_insn "vmmaskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]) - (subreg:V4SI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE" - "cmp%D3ss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "vmmaskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])) - (subreg:V4SI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordss\t{%2, %0|%0, %2}"; - else - return "cmpn%D3ss\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "sse_comi" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE" - "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) - -(define_insn "sse_ucomi" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE" - "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) - - -;; SSE unpack - -(define_insn "sse_unpckhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_SSE" - "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_unpcklps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_SSE" - "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -;; SSE min/max - -(define_insn "smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsmaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_insn "sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -;; SSE <-> integer/MMX conversions - -(define_insn "cvtpi2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) - (const_int 12)))] - "TARGET_SSE" - "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "cvtps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "cvttps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SF")]) - -(define_insn "cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) - (const_int 14)))] - "TARGET_SSE" - "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtsi2ssq" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) - (const_int 14)))] - "TARGET_SSE && TARGET_64BIT" - "cvtsi2ssq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (vec_select:SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "SI")]) - -(define_insn "cvtss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (vec_select:DI - (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvtss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "DI")]) - -(define_insn "cvttss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] - UNSPEC_FIX) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvttss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (vec_select:DI - (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] - UNSPEC_FIX) - (parallel [(const_int 0)])))] - "TARGET_SSE && TARGET_64BIT" - "cvttss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - - -;; MMX insns - -;; MMX arithmetic - -(define_insn "addv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "addv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "addv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_adddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(plus:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ssaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ssaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "usaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "usaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_subdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(minus:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sssubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sssubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ussubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ussubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_MMX" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI - (mult:V2SI - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) (const_int 2)])))) - (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2SI (vec_select:V2HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3)]))))))] - "TARGET_MMX" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - - -;; MMX logical operations -;; Note we don't want to declare these as regular iordi3 insns to prevent -;; normal code that also wants to use the FPU from getting broken. -;; The UNSPECs are there to prevent the combiner from getting overly clever. -(define_insn "mmx_iordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_xordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) - -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "mmx_clrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) - -(define_insn "mmx_anddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_nanddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ashiftrt:V8QI - (plus:V8QI (plus:V8QI - (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")) - (const_vector:V8QI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI - (plus:V4HI (plus:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")) - (const_vector:V4HI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_psadbw" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PSADBW))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX insert/extract/shuffle - -(define_expand "mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "") - (vec_duplicate:V4HI - (match_operand:SI 2 "nonimmediate_operand" "")) - (match_operand:SI 3 "const_0_to_3_operand" "")))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[2] = gen_lowpart (HImode, operands[2]); - operands[3] = GEN_INT (1 << INTVAL (operands[3])); -}) - -(define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (vec_duplicate:V4HI - (match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; -} - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") - (parallel - [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pshufw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE || TARGET_3DNOW_A" - "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; MMX mask-generating comparisons - -(define_insn "eqv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - - -;; MMX max/min insns - -(define_insn "umaxv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "smaxv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "uminv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sminv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX shifts - -(define_insn "ashrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX pack/unpack insns. - -(define_insn "mmx_packsswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) - (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packuswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] - "TARGET_MMX" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_MMX" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhdq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (match_operand:V2SI 1 "register_operand" "0") - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_MMX" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] - "TARGET_MMX" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_MMX" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckldq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SI 2 "register_operand" "y") - (const_int 1)))] - "TARGET_MMX" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; Miscellaneous stuff - -(define_insn "emms" - [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_MMX" - "emms" - [(set_attr "type" "mmx") - (set_attr "memory" "unknown")]) - -(define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] - UNSPECV_LDMXCSR)] - "TARGET_SSE" - "ldmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "load")]) - -(define_insn "stmxcsr" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_SSE" - "stmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "store")]) - -(define_expand "sfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) - -(define_insn "*sfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" - "sfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) - -(define_expand "sse_prologue_save" - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) - (use (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" "")))])] - "TARGET_64BIT" - "") - -(define_insn "*sse_prologue_save_insn" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) - (use (match_operand:DI 1 "register_operand" "r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X")))] - "TARGET_64BIT - && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" - "* -{ - int i; - operands[0] = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, operands[0], operands[4])); - output_asm_insn (\"jmp\\t%A1\", operands); - for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) - { - operands[4] = adjust_address (operands[0], DImode, i*16); - operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); - PUT_MODE (operands[4], TImode); - if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn (\"rex\", operands); - output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); - } - (*targetm.asm_out.internal_label) (asm_out_file, \"L\", - CODE_LABEL_NUMBER (operands[3])); - RET; -} - " - [(set_attr "type" "other") - (set_attr "length_immediate" "0") - (set_attr "length_address" "0") - (set_attr "length" "135") - (set_attr "memory" "store") - (set_attr "modrm" "0") - (set_attr "mode" "DI")]) - -;; 3Dnow! instructions - -(define_insn "addv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subrv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") - (match_operand:V2SF 1 "register_operand" "0")))] - "TARGET_3DNOW" - "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "gtv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "gev2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "eqv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "pfmaxv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfminv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "mulv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "V2SF")]) - -(define_insn "femms" - [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_3DNOW" - "femms" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -(define_insn "pf2id" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pf2iw" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (sign_extend:V2SI - (ss_truncate:V2HI - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] - "TARGET_3DNOW_A" - "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pfacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (plus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW" - "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (minus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfpnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pi2fw" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF - (vec_concat:V2SI - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))) - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_dup 1) - (parallel [(const_int 1)])))))))] - "TARGET_3DNOW_A" - "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "floatv2si2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -;; This insn is identical to pavgb in operation, but the opcode is -;; different. To avoid accidentally matching pavgb, use an unspec. - -(define_insn "pavgusb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI - [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PAVGUSB))] - "TARGET_3DNOW" - "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "TI")]) - -;; 3DNow reciprocal and sqrt - -(define_insn "pfrcpv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRCP))] - "TARGET_3DNOW" - "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT1))] - "TARGET_3DNOW" - "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit2v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT2))] - "TARGET_3DNOW" - "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqrtv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQRT))] - "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQIT1))] - "TARGET_3DNOW" - "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pmulhrwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_vector:V4SI [(const_int 32768) - (const_int 32768) - (const_int 32768) - (const_int 32768)])) - (const_int 16))))] - "TARGET_3DNOW" - "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2si2" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - -(define_expand "prefetch" - [(prefetch (match_operand 0 "address_operand" "") - (match_operand:SI 1 "const_int_operand" "") - (match_operand:SI 2 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE || TARGET_3DNOW" -{ - int rw = INTVAL (operands[1]); - int locality = INTVAL (operands[2]); - - if (rw != 0 && rw != 1) - abort (); - if (locality < 0 || locality > 3) - abort (); - if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode) - abort (); - - /* Use 3dNOW prefetch in case we are asking for write prefetch not - suported by SSE counterpart or the SSE prefetch is not available - (K6 machines). Otherwise use SSE prefetch as it allows specifying - of locality. */ - if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) - operands[2] = GEN_INT (3); - else - operands[1] = const0_rtx; -}) - -(define_insn "*prefetch_sse" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (const_int 0) - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && !TARGET_64BIT" -{ - static const char * const patterns[4] = { - "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" - }; - - int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); - - return patterns[locality]; -} - [(set_attr "type" "sse") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_sse_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (const_int 0) - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && TARGET_64BIT" -{ - static const char * const patterns[4] = { - "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" - }; - - int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); - - return patterns[locality]; -} - [(set_attr "type" "sse") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_3dnow" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 3))] - "TARGET_3DNOW && !TARGET_64BIT" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_3dnow_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 3))] - "TARGET_3DNOW && TARGET_64BIT" +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && !TARGET_64BIT" { if (INTVAL (operands[1]) == 0) return "prefetch\t%a0"; else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -;; SSE2 support - -(define_insn "addv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "addpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmaddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "addsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "subv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "subpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "subsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "mulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "mulpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "V2DF")]) - -(define_insn "vmmulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "mulsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "DF")]) - -(define_insn "divv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "divpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "V2DF")]) - -(define_insn "vmdivv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "divsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "DF")]) - -;; SSE min/max - -(define_insn "smaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "maxpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsmaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "sminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "minpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) -;; SSE2 square root. There doesn't appear to be an extension for the -;; reciprocal/rsqrt instructions if the Intel manual is to be believed. - -(define_insn "sqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))] - "TARGET_SSE2" - "sqrtpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) - (match_operand:V2DF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE2" - "sqrtsd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -;; SSE mask-generating compares - -(define_insn "maskcmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")]))] - "TARGET_SSE2" - "cmp%D3pd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V2DF")]) - -(define_insn "maskncmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (not:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] - "TARGET_SSE2" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordps\t{%2, %0|%0, %2}"; - else - return "cmpn%D3pd\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "V2DF")]) - -(define_insn "vmmaskcmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")]) - (subreg:V2DI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE2" - "cmp%D3sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) - -(define_insn "vmmaskncmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (not:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")])) - (subreg:V2DI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE2" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordsd\t{%2, %0|%0, %2}"; - else - return "cmpn%D3sd\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) - -(define_insn "sse2_comi" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "comisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) - -(define_insn "sse2_ucomi" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "ucomisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) - -;; SSE Strange Moves. - -(define_insn "sse2_movmskpd" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE2" - "movmskpd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE2" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_maskmovdqu" - [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x")] - UNSPEC_MASKMOV))] - "TARGET_SSE2" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_maskmovdqu_rex64" - [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x")] - UNSPEC_MASKMOV))] - "TARGET_SSE2" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movntv2df" - [(set (match_operand:V2DF 0 "memory_operand" "=m") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntpd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movntv2di" - [(set (match_operand:V2DI 0 "memory_operand" "=m") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movntsi" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec:SI [(match_operand:SI 1 "register_operand" "r")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movnti\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; SSE <-> integer/MMX conversions - -;; Conversions between SI and SF - -(define_insn "cvtdq2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtdq2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "cvtps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX))] - "TARGET_SSE2" - "cvttps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; Conversions between SI and DF - -(define_insn "cvtdq2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float:V2DF (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel - [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE2" - "cvtdq2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "cvtpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2" - "cvtpd2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2" - "cvttpd2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvtpd2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtpd2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttpd2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX))] - "TARGET_SSE2" - "cvttpd2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvtpi2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_SSE2" - "cvtpi2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; Conversions between SI and DF - -(define_insn "cvtsd2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "cvtsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "SI")]) - -(define_insn "cvtsd2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE2 && TARGET_64BIT" - "cvtsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "DI")]) - -(define_insn "cvttsd2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") - (parallel [(const_int 0)]))] UNSPEC_FIX))] - "TARGET_SSE2" - "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SI") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvttsd2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") - (parallel [(const_int 0)]))] UNSPEC_FIX))] - "TARGET_SSE2 && TARGET_64BIT" - "cvttsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DI") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvtsi2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") - (vec_duplicate:V2DF - (float:DF - (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) - (const_int 2)))] - "TARGET_SSE2" - "cvtsi2sd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct")]) - -(define_insn "cvtsi2sdq" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") - (vec_duplicate:V2DF - (float:DF - (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) - (const_int 2)))] - "TARGET_SSE2 && TARGET_64BIT" - "cvtsi2sdq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct")]) - -;; Conversions between SF and DF - -(define_insn "cvtsd2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float_truncate:V2SF - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))) - (const_int 14)))] - "TARGET_SSE2" - "cvtsd2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtss2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") - (float_extend:V2DF - (vec_select:V2SF - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))) - (const_int 2)))] - "TARGET_SSE2" - "cvtss2sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "cvtpd2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (subreg:V4SF - (vec_concat:V4SI - (subreg:V2SI (float_truncate:V2SF - (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0) - (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))] - "TARGET_SSE2" - "cvtpd2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "cvtps2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float_extend:V2DF - (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE2" - "cvtps2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; SSE2 variants of MMX insns - -;; MMX arithmetic - -(define_insn "addv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ssaddv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ssaddv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "usaddv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "usaddv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (minus:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (minus:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sssubv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sssubv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ussubv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ussubv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "mulv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (mult:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "smulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) - (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_SSE2" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "umulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) - (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_SSE2" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "sse2_umulsidi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (mult:DI (zero_extend:DI (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0)]))) - (zero_extend:DI (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))))] - "TARGET_SSE2" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "sse2_umulv2siv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (mult:V2DI (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) (const_int 2)])))))] - "TARGET_SSE2" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI (vec_select:V4HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))))] - "TARGET_SSE2" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "sse2_clrti" - [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] - "TARGET_SSE2" -{ - if (get_attr_mode (insn) == MODE_TI) - return "pxor\t%0, %0"; - else - return "xorps\t%0, %0"; + return "prefetchw\t%a0"; } - [(set_attr "type" "ssemov") - (set_attr "memory" "none") - (set (attr "mode") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")))]) - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "sse2_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ashiftrt:V16QI - (plus:V16QI (plus:V16QI - (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1)))] - "TARGET_SSE2" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sse2_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI - (plus:V8HI (plus:V8HI - (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1)))] - "TARGET_SSE2" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -;; @@@ this isn't the right representation. -(define_insn "sse2_psadbw" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] - UNSPEC_PSADBW))] - "TARGET_SSE2" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - - -;; MMX insert/extract/shuffle - -(define_expand "sse2_pinsrw" - [(set (match_operand:V8HI 0 "register_operand" "") - (vec_merge:V8HI - (match_operand:V8HI 1 "register_operand" "") - (vec_duplicate:V8HI - (match_operand:SI 2 "nonimmediate_operand" "")) - (match_operand:SI 3 "const_0_to_7_operand" "")))] - "TARGET_SSE2" -{ - operands[2] = gen_lowpart (HImode, operands[2]); - operands[3] = GEN_INT (1 << INTVAL (operands[3])); -}) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) -(define_insn "*sse2_pinsrw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (match_operand:V8HI 1 "register_operand" "0") - (vec_duplicate:V8HI - (match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))] - "TARGET_SSE2" +(define_insn "*prefetch_3dnow_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && TARGET_64BIT" { - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; } - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI - (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") - (parallel - [(match_operand:SI 2 "const_0_to_7_operand" "N")]))))] - "TARGET_SSE2" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshufd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (unspec:V4SI [(match_operand:V4SI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE2" - "pshufd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshuflw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_PSHUFLW))] - "TARGET_SSE2" - "pshuflw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshufhw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_PSHUFHW))] - "TARGET_SSE2" - "pshufhw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; MMX mask-generating comparisons - -(define_insn "eqv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (eq:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "eqv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (eq:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "eqv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (eq:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (gt:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (gt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (gt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - - -;; MMX max/min insns - -(define_insn "umaxv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (umax:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "smaxv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (smax:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "uminv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (umin:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sminv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (smin:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - - -;; MMX shifts - -(define_insn "ashrv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv2di3_ti" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv2di3_ti" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -;; See logical MMX insns for the reason for the unspec. Strictly speaking -;; we wouldn't need here it since we never generate TImode arithmetic. - -;; There has to be some kind of prize for the weirdest new instruction... -(define_insn "sse2_ashlti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI - [(ashift:TI (match_operand:TI 1 "register_operand" "0") - (mult:SI (match_operand:SI 2 "immediate_operand" "i") - (const_int 8)))] UNSPEC_NOP))] - "TARGET_SSE2" - "pslldq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "sse2_lshrti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI - [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0") - (mult:SI (match_operand:SI 2 "immediate_operand" "i") - (const_int 8)))] UNSPEC_NOP))] - "TARGET_SSE2" - "psrldq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -;; SSE unpack - -(define_insn "sse2_unpckhpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 1)])) - (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 1)]))))] - "TARGET_SSE2" - "unpckhpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_unpcklpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "unpcklpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; MMX pack/unpack insns. - -(define_insn "sse2_packsswb" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_concat:V16QI - (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) - (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_packssdw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0")) - (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_packuswb" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_concat:V16QI - (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) - (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpckhbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_merge:V16QI - (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") - (parallel [(const_int 8) (const_int 0) - (const_int 9) (const_int 1) - (const_int 10) (const_int 2) - (const_int 11) (const_int 3) - (const_int 12) (const_int 4) - (const_int 13) (const_int 5) - (const_int 14) (const_int 6) - (const_int 15) (const_int 7)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11) - (const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])) - (const_int 21845)))] - "TARGET_SSE2" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpckhwd" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 4) (const_int 0) - (const_int 5) (const_int 1) - (const_int 6) (const_int 2) - (const_int 7) (const_int 3)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5) - (const_int 2) (const_int 6) - (const_int 3) (const_int 7)])) - (const_int 85)))] - "TARGET_SSE2" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpckhdq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 2) (const_int 0) - (const_int 3) (const_int 1)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 2) - (const_int 1) (const_int 3)])) - (const_int 5)))] - "TARGET_SSE2" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpcklbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_merge:V16QI - (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11) - (const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") - (parallel [(const_int 8) (const_int 0) - (const_int 9) (const_int 1) - (const_int 10) (const_int 2) - (const_int 11) (const_int 3) - (const_int 12) (const_int 4) - (const_int 13) (const_int 5) - (const_int 14) (const_int 6) - (const_int 15) (const_int 7)])) - (const_int 21845)))] - "TARGET_SSE2" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpcklwd" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5) - (const_int 2) (const_int 6) - (const_int 3) (const_int 7)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") - (parallel [(const_int 4) (const_int 0) - (const_int 5) (const_int 1) - (const_int 6) (const_int 2) - (const_int 7) (const_int 3)])) - (const_int 85)))] - "TARGET_SSE2" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpckldq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2) - (const_int 1) (const_int 3)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") - (parallel [(const_int 2) (const_int 0) - (const_int 3) (const_int 1)])) - (const_int 5)))] - "TARGET_SSE2" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpcklqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2DI 1 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE2" - "punpcklqdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_punpckhqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (match_operand:V2DI 1 "register_operand" "0") - (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_SSE2" - "punpckhqdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; SSE2 moves - -(define_insn "sse2_movapd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movapd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movupd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movupd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movdqa" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movdqa\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movdqu" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movdqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movdq2q" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") - (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") - (parallel [(const_int 0)])))] - "TARGET_SSE2 && !TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movdq2q_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") - (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") - (parallel [(const_int 0)])))] - "TARGET_SSE2 && TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movq2dq" - [(set (match_operand:V2DI 0 "register_operand" "=x,?x") - (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") - (const_int 0)))] - "TARGET_SSE2 && !TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssemov") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movq2dq_rex64" - [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") - (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") - (const_int 0)))] - "TARGET_SSE2 && TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssemov,ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_concat:V2DI (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (const_int 0)))] - "TARGET_SSE2" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "TI")]) - -(define_insn "sse2_loadd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) - (const_vector:V4SI [(const_int 0) - (const_int 0) - (const_int 0) - (const_int 0)]) - (const_int 1)))] - "TARGET_SSE2" - "movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "TI")]) - -(define_insn "sse2_stored" - [(set (match_operand:SI 0 "nonimmediate_operand" "=mr") - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE2" - "movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "TI")]) - -;; Store the high double of the source vector into the double destination. -(define_insn "sse2_storehpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,Y,Y") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o") - (parallel [(const_int 1)])))] - "TARGET_SSE2" - "@ - movhpd\t{%1, %0|%0, %1} - unpckhpd\t%0, %0 - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (vec_select:DF - (match_operand:V2DF 1 "memory_operand" "") - (parallel [(const_int 1)])))] - "TARGET_SSE2 && reload_completed" - [(const_int 0)] +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" { - emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8)); +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_set_di (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_set_si (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_set_di (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); +#endif DONE; }) -;; Load the high double of the target vector from the source scalar. -(define_insn "sse2_loadhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,o") - (vec_concat:V2DF - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (parallel [(const_int 0)])) - (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")))] - "TARGET_SSE2" - "@ - movhpd\t{%2, %0|%0, %2} - unpcklpd\t{%2, %0|%0, %2} - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) -(define_split - [(set (match_operand:V2DF 0 "memory_operand" "") - (vec_concat:V2DF - (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) - (match_operand:DF 1 "register_operand" "")))] - "TARGET_SSE2 && reload_completed" - [(const_int 0)] -{ - emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]); - DONE; -}) +(define_insn "stack_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) -;; Store the low double of the source vector into the double destination. -(define_expand "sse2_storelpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "") - (parallel [(const_int 1)])))] - "TARGET_SSE2" -{ - operands[1] = gen_lowpart (DFmode, operands[1]); - emit_move_insn (operands[0], operands[1]); - DONE; -}) +(define_insn "stack_tls_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) -;; Load the load double of the target vector from the source scalar. -(define_insn "sse2_loadlpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") - (vec_concat:V2DF - (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (parallel [(const_int 1)]))))] - "TARGET_SSE2" - "@ - movlpd\t{%2, %0|%0, %2} - movsd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; Merge the low part of the source vector into the low part of the target. -(define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") - (vec_merge:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y") - (const_int 2)))] - "TARGET_SSE2" - "@movsd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF,V2DF,V2DF")]) +(define_insn "stack_tls_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + else + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + } + [(set_attr "type" "multi")]) -(define_expand "sse2_loadsd" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:DF 1 "memory_operand" "")] - "TARGET_SSE2" +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" { - emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], - CONST0_RTX (V2DFmode))); - DONE; -}) - -(define_insn "sse2_loadsd_1" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) - (match_operand:V2DF 2 "const0_operand" "X") - (const_int 1)))] - "TARGET_SSE2" - "movsd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "sse2_storesd" - [(set (match_operand:DF 0 "memory_operand" "=m") - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE2" - "movsd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "sse2_shufpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE2" - ;; @@@ check operand order for intel/nonintel syntax - "shufpd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_clflush" - [(unspec_volatile [(match_operand 0 "address_operand" "p")] - UNSPECV_CLFLUSH)] - "TARGET_SSE2" - "clflush\t%a0" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + ix86_compare_emitted = flags; -(define_expand "sse2_mfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_SSE2" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_test_di (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_test_si (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1])); + else + emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1])); +#endif + emit_jump_insn (gen_beq (operands[2])); + DONE; }) -(define_insn "*mfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_SSE2" - "mfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) +(define_insn "stack_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:SI 3 "=&r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) -(define_expand "sse2_lfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] - "TARGET_SSE2" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) +(define_insn "stack_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_64BIT" + "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) -(define_insn "*lfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] - "TARGET_SSE2" - "lfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) - -;; SSE3 - -(define_insn "mwait" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c")] - UNSPECV_MWAIT)] - "TARGET_SSE3" - "mwait\t%0, %1" - [(set_attr "length" "3")]) - -(define_insn "monitor" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c") - (match_operand:SI 2 "register_operand" "d")] - UNSPECV_MONITOR)] - "TARGET_SSE3" - "monitor\t%0, %1, %2" - [(set_attr "length" "3")]) - -;; SSE3 arithmetic - -(define_insn "addsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_ADDSUB))] - "TARGET_SSE3" - "addsubps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "addsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_ADDSUB))] - "TARGET_SSE3" - "addsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "haddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_HADD))] - "TARGET_SSE3" - "haddps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "haddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_HADD))] - "TARGET_SSE3" - "haddpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "hsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_HSUB))] - "TARGET_SSE3" - "hsubps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "hsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_HSUB))] - "TARGET_SSE3" - "hsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "movshdup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] - "TARGET_SSE3" - "movshdup\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "movsldup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] - "TARGET_SSE3" - "movsldup\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "lddqu" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] - UNSPEC_LDQQU))] - "TARGET_SSE3" - "lddqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "stack_tls_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:SI 3 "=r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}" + [(set_attr "type" "multi")]) -(define_insn "loadddup" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] - "TARGET_SSE3" - "movddup\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) +(define_insn "stack_tls_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:DI 3 "=r"))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"; + else + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}"; + } + [(set_attr "type" "multi")]) -(define_insn "movddup" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_duplicate:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE3" - "movddup\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) +(include "sse.md") +(include "mmx.md") +(include "sync.md")