X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.md;h=d9b625cb2956866ab4c5e66d573448dd760b89f0;hp=9593d8f9b8aecfbe4d26c49dc9ac640ef8243118;hb=9b980db1c91f8dfb2b916d4df8d542b26b0a9078;hpb=b7940b13fb6fd2fd0b873d873f3f849371b9cb48 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9593d8f9b8a..d9b625cb295 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,6 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003, 2004, 2005, 2006, 2007 +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -9,7 +9,7 @@ ;; ;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, @@ -18,18 +18,14 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License -;; along with GCC; see the file COPYING. If not, write to -;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. */ +;; along with GCC; see the file COPYING3. If not see +;; . */ ;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. ;; ;; See file "rtl.def" for documentation on define_insn, match_*, et. al. ;; -;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register -;; constraint letters. -;; ;; The special asm out single letter directives following a '%' are: ;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of ;; operands[1]. @@ -61,6 +57,7 @@ (UNSPEC_DTPOFF 6) (UNSPEC_GOTNTPOFF 7) (UNSPEC_INDNTPOFF 8) + (UNSPEC_PLTOFF 9) ; Prologue support (UNSPEC_STACK_ALLOC 11) @@ -68,36 +65,38 @@ (UNSPEC_SSE_PROLOGUE_SAVE 13) (UNSPEC_REG_SAVE 14) (UNSPEC_DEF_CFA 15) + (UNSPEC_SET_RIP 16) + (UNSPEC_SET_GOT_OFFSET 17) ; TLS support - (UNSPEC_TP 16) - (UNSPEC_TLS_GD 17) - (UNSPEC_TLS_LD_BASE 18) - (UNSPEC_TLSDESC 19) + (UNSPEC_TP 18) + (UNSPEC_TLS_GD 19) + (UNSPEC_TLS_LD_BASE 20) + (UNSPEC_TLSDESC 21) ; Other random patterns - (UNSPEC_SCAS 20) - (UNSPEC_FNSTSW 21) - (UNSPEC_SAHF 22) - (UNSPEC_FSTCW 23) - (UNSPEC_ADD_CARRY 24) - (UNSPEC_FLDCW 25) - (UNSPEC_REP 26) - (UNSPEC_EH_RETURN 27) - (UNSPEC_LD_MPIC 28) ; load_macho_picbase - (UNSPEC_TRUNC_NOOP 29) + (UNSPEC_SCAS 30) + (UNSPEC_FNSTSW 31) + (UNSPEC_SAHF 32) + (UNSPEC_FSTCW 33) + (UNSPEC_ADD_CARRY 34) + (UNSPEC_FLDCW 35) + (UNSPEC_REP 36) + (UNSPEC_EH_RETURN 37) + (UNSPEC_LD_MPIC 38) ; load_macho_picbase + (UNSPEC_TRUNC_NOOP 39) ; For SSE/MMX support: - (UNSPEC_FIX_NOTRUNC 30) - (UNSPEC_MASKMOV 31) - (UNSPEC_MOVMSK 32) - (UNSPEC_MOVNT 33) - (UNSPEC_MOVU 34) - (UNSPEC_RCP 35) - (UNSPEC_RSQRT 36) - (UNSPEC_SFENCE 37) - (UNSPEC_NOP 38) ; prevents combiner cleverness - (UNSPEC_PFRCP 39) + (UNSPEC_FIX_NOTRUNC 40) + (UNSPEC_MASKMOV 41) + (UNSPEC_MOVMSK 42) + (UNSPEC_MOVNT 43) + (UNSPEC_MOVU 44) + (UNSPEC_RCP 45) + (UNSPEC_RSQRT 46) + (UNSPEC_SFENCE 47) + (UNSPEC_NOP 48) ; prevents combiner cleverness + (UNSPEC_PFRCP 49) (UNSPEC_PFRCPIT1 40) (UNSPEC_PFRCPIT2 41) (UNSPEC_PFRSQRT 42) @@ -144,6 +143,8 @@ (UNSPEC_FPREM1_F 90) (UNSPEC_FPREM1_U 91) + (UNSPEC_C2_FLAG 95) + ; SSP patterns (UNSPEC_SP_SET 100) (UNSPEC_SP_TEST 101) @@ -157,9 +158,35 @@ ; For SSE4A support (UNSPEC_EXTRQI 130) - (UNSPEC_EXTRQ 131) + (UNSPEC_EXTRQ 131) (UNSPEC_INSERTQI 132) (UNSPEC_INSERTQ 133) + + ; For SSE4.1 support + (UNSPEC_BLENDV 134) + (UNSPEC_INSERTPS 135) + (UNSPEC_DP 136) + (UNSPEC_MOVNTDQA 137) + (UNSPEC_MPSADBW 138) + (UNSPEC_PHMINPOSUW 139) + (UNSPEC_PTEST 140) + (UNSPEC_ROUND 141) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) + (UNSPEC_PCMPESTR 144) + (UNSPEC_PCMPISTR 145) + + ;; For SSE5 + (UNSPEC_SSE5_INTRINSIC 150) + (UNSPEC_SSE5_UNSIGNED_CMP 151) + (UNSPEC_SSE5_TRUEFALSE 152) + (UNSPEC_SSE5_PERMUTE 153) + (UNSPEC_SSE5_ASHIFT 154) + (UNSPEC_SSE5_LSHIFT 155) + (UNSPEC_FRCZ 156) + (UNSPEC_CVTPH2PS 157) + (UNSPEC_CVTPS2PH 158) ]) (define_constants @@ -177,11 +204,27 @@ (UNSPECV_CMPXCHG_2 11) (UNSPECV_XCHG 12) (UNSPECV_LOCK 13) + (UNSPECV_PROLOGUE_USE 14) + ]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) ]) ;; Registers by name. (define_constants - [(BP_REG 6) + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) (SP_REG 7) (FLAGS_REG 17) (FPSR_REG 18) @@ -215,8 +258,9 @@ push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, - sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, + ssemuladd,sse4arg, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -229,8 +273,9 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") - (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins") + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") @@ -315,6 +360,9 @@ ] (const_int 0))) +;; There are also additional prefixes in SSSE3. +(define_attr "prefix_extra" "" (const_int 0)) + ;; Set when modrm byte is used. (define_attr "modrm" "" (cond [(eq_attr "type" "str,leave") @@ -364,7 +412,8 @@ (plus (plus (attr "modrm") (plus (attr "prefix_0f") (plus (attr "prefix_rex") - (const_int 1)))) + (plus (attr "prefix_extra") + (const_int 1))))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -426,11 +475,11 @@ "!alu1,negnot,ishift1, imov,imovx,icmp,test,bitmanip, fmov,fcmp,fsgn, - sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, - mmx,mmxmov,mmxcmp,mmxcvt") + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, + sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") - (and (eq_attr "type" "icmov") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand" "")) (const_string "load") ] @@ -467,23 +516,47 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) -;; All x87 floating point modes -(define_mode_macro X87MODEF [SF DF XF]) +(define_code_iterator plusminus [plus minus]) + +;; Base name for define_insn and insn mnemonic. +(define_code_attr addsub [(plus "add") (minus "sub")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (minus "")]) + +;; All single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "i") (HI "i") (SI "i") (DI "e")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_general_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) -;; x87 SFmode and DFMode floating point modes -(define_mode_macro X87MODEF12 [SF DF]) +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) ;; All integer modes handled by x87 fisttp operator. -(define_mode_macro X87MODEI [HI SI DI]) +(define_mode_iterator X87MODEI [HI SI DI]) ;; All integer modes handled by integer x87 operators. -(define_mode_macro X87MODEI12 [HI SI]) - -;; All SSE floating point modes -(define_mode_macro SSEMODEF [SF DF]) +(define_mode_iterator X87MODEI12 [HI SI]) ;; All integer modes handled by SSE cvtts?2si* operators. -(define_mode_macro SSEMODEI24 [SI DI]) +(define_mode_iterator SSEMODEI24 [SI DI]) ;; SSE asm suffix for floating point modes (define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) @@ -641,8 +714,8 @@ (define_expand "cmpsi_1" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r") - (match_operand:SI 1 "general_operand" "ri,mr")))] + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" "")))] "" "") @@ -837,22 +910,11 @@ DONE; }) -(define_expand "cmpdf" - [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "") - (match_operand:DF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" -{ - ix86_compare_op0 = operands[0]; - ix86_compare_op1 = operands[1]; - DONE; -}) - -(define_expand "cmpsf" +(define_expand "cmp" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "") - (match_operand:SF 1 "cmp_fp_expander_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" + (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "") + (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -875,8 +937,7 @@ (match_operand 1 "register_operand" "f") (match_operand 2 "const0_operand" "X"))] UNSPEC_FNSTSW))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") @@ -889,44 +950,101 @@ ] (const_string "XF")))]) -(define_insn "*cmpfp_sf" - [(set (match_operand:HI 0 "register_operand" "=a") +(define_insn_and_split "*cmpfp_0_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:HI - [(compare:CCFP - (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "nonimmediate_operand" "fm"))] - UNSPEC_FNSTSW))] - "TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" [(set_attr "type" "multi") (set_attr "unit" "i387") - (set_attr "mode" "SF")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) -(define_insn "*cmpfp_df" +(define_insn "*cmpfp_xf" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:DF 1 "register_operand" "f") - (match_operand:DF 2 "nonimmediate_operand" "fm"))] + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") (set_attr "unit" "i387") - (set_attr "mode" "DF")]) + (set_attr "mode" "XF")]) -(define_insn "*cmpfp_xf" +(define_insn_and_split "*cmpfp_xf_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))] + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") (set_attr "unit" "i387") - (set_attr "mode" "XF")]) + (set_attr "mode" "")]) + +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) (define_insn "*cmpfp_u" [(set (match_operand:HI 0 "register_operand" "=a") @@ -935,8 +1053,7 @@ (match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f"))] UNSPEC_FNSTSW))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "multi") @@ -949,6 +1066,34 @@ ] (const_string "XF")))]) +(define_insn_and_split "*cmpfp_u_cc" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + (define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI @@ -957,8 +1102,8 @@ (match_operator 3 "float_operator" [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] UNSPEC_FNSTSW))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_USE_MODE_FIOP && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") @@ -966,6 +1111,33 @@ (set_attr "fp_int_src" "true") (set_attr "mode" "")]) +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && TARGET_USE_MODE_FIOP + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + ;; FP compares, step 2 ;; Move the fpsw to ax. @@ -983,16 +1155,23 @@ (define_insn "x86_sahf_1" [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))] - "!TARGET_64BIT" - "sahf" + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] + UNSPEC_SAHF))] + "TARGET_SAHF" +{ +#ifdef HAVE_AS_IX86_SAHF + return "sahf"; +#else + return ".byte\t0x9e"; +#endif +} [(set_attr "length" "1") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "mode" "SI")]) ;; Pentium Pro can do steps 1 through 3 in one go. -;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) (define_insn "*cmpfp_i_mixed" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "f,x") @@ -1029,9 +1208,9 @@ [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "f") (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) - && FLOAT_MODE_P (GET_MODE (operands[0])) + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") @@ -1081,9 +1260,9 @@ [(set (reg:CCFPU FLAGS_REG) (compare:CCFPU (match_operand 0 "register_operand" "f") (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) - && FLOAT_MODE_P (GET_MODE (operands[0])) + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "fcmp") @@ -1179,7 +1358,7 @@ (clobber (reg:CC FLAGS_REG))] "reload_completed && operands[1] == constm1_rtx - && (TARGET_PENTIUM || optimize_size)" + && (TARGET_MOVE_M1_VIA_OR || optimize_size)" { operands[1] = constm1_rtx; return "or{l}\t{%1, %0|%0, %1}"; @@ -1302,7 +1481,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) (define_expand "movhi" [(set (match_operand:HI 0 "nonimmediate_operand" "") @@ -1420,7 +1599,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) ;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swaphi_2" @@ -1596,7 +1775,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) ;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swapqi_2" @@ -1915,7 +2094,7 @@ [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "immediate_operand" ""))] "TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 0) (match_dup 1)) @@ -1971,7 +2150,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (match_operand:DI 1 "const_int_operand" "i")) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && (TARGET_PENTIUM || optimize_size) + "TARGET_64BIT && (TARGET_MOVE_M1_VIA_OR || optimize_size) && reload_completed && operands[1] == constm1_rtx" { @@ -2025,9 +2204,9 @@ (define_insn "*movdi_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") + "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") (match_operand:DI 1 "general_operand" - "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] + "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2151,7 +2330,7 @@ [(set (match_operand:DI 0 "memory_operand" "") (match_operand:DI 1 "immediate_operand" ""))] "TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) @@ -2169,7 +2348,7 @@ (set_attr "mode" "DI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -2178,6 +2357,8 @@ { if (TARGET_64BIT) ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); else ix86_expand_vector_move (TImode, operands); DONE; @@ -2269,6 +2450,22 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand" "") + (match_operand:CDI 1 "general_operand" ""))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + (define_expand "movsf" [(set (match_operand:SF 0 "nonimmediate_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -2289,7 +2486,7 @@ (set_attr "mode" "SF,SI,SF")]) (define_insn "*pushsf_rex64" - [(set (match_operand:SF 0 "push_operand" "=<,<,<") + [(set (match_operand:SF 0 "push_operand" "=X,X,X") (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] "TARGET_64BIT" { @@ -2306,10 +2503,9 @@ (match_operand:SF 1 "memory_operand" ""))] "reload_completed && MEM_P (operands[1]) - && constant_pool_reference_p (operands[1])" + && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) - (match_dup 1))] - "operands[1] = avoid_constant_pool_reference (operands[1]);") + (match_dup 2))]) ;; %%% Kill this when call knows how to work this out. @@ -2329,9 +2525,9 @@ (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" - "=f,m,f,r ,m ,x,x,x ,m,*y,m ,*y,Yi,r ,*Ym,r ") + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") (match_operand:SF 1 "general_operand" - "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y,r ,Yi,r ,*Ym"))] + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2343,13 +2539,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2499,26 +2690,25 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") (match_operand:DF 1 "general_operand" - "fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))] + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size + && !memory_operand (operands[0], DFmode) && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || ((optimize_size + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" { switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2633,13 +2823,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2760,13 +2945,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2972,15 +3152,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -3007,15 +3180,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -3030,6 +3196,59 @@ [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + (define_split [(set (match_operand 0 "nonimmediate_operand" "") (match_operand 1 "general_operand" ""))] @@ -3050,13 +3269,14 @@ (match_operand 1 "memory_operand" ""))] "reload_completed && MEM_P (operands[1]) - && (GET_MODE (operands[0]) == XFmode + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) - && constant_pool_reference_p (operands[1])" - [(set (match_dup 0) (match_dup 1))] + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] { - rtx c = avoid_constant_pool_reference (operands[1]); + rtx c = operands[2]; rtx r = operands[0]; if (GET_CODE (r) == SUBREG) @@ -3074,8 +3294,6 @@ } else if (MMX_REG_P (r)) FAIL; - - operands[1] = c; }) (define_split @@ -3083,13 +3301,14 @@ (float_extend (match_operand 1 "memory_operand" "")))] "reload_completed && MEM_P (operands[1]) - && (GET_MODE (operands[0]) == XFmode + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) - && constant_pool_reference_p (operands[1])" - [(set (match_dup 0) (match_dup 1))] + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] { - rtx c = avoid_constant_pool_reference (SET_SRC (PATTERN (curr_insn))); + rtx c = operands[2]; rtx r = operands[0]; if (GET_CODE (r) == SUBREG) @@ -3107,8 +3326,6 @@ } else if (MMX_REG_P (r)) FAIL; - - operands[1] = c; }) (define_insn "swapxf" @@ -3146,79 +3363,26 @@ operands[1] = CONST1_RTX (mode); }) -(define_expand "movtf" +(define_split [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_64BIT" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Zero extension instructions -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" { - switch (which_alternative) + if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "*,*,sselog1,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -;; Zero extension instructions - -(define_expand "zero_extendhisi2" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] - "" -{ - if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) - { - operands[1] = force_reg (HImode, operands[1]); - emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); - DONE; + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; } }) @@ -3279,7 +3443,7 @@ [(set (match_operand:HI 0 "register_operand" "=r") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" - "movz{bl|x}\t{%1, %k0|%k0, %k1}" + "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) @@ -3392,8 +3556,8 @@ ;; %%% Kill me once multi-word ops are sane. (define_expand "zero_extendsidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { if (!TARGET_64BIT) @@ -3404,9 +3568,9 @@ }) (define_insn "zero_extendsidi2_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,*y,?*Yi,*Y2") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2") (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) + (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "@ @@ -3421,9 +3585,9 @@ (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")]) (define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,*y,?*Yi,*Y2") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] "TARGET_64BIT" "@ mov\t{%k1, %k0|%k0, %k1} @@ -3558,9 +3722,9 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[1]) == 0 - && true_regnum (operands[2]) == 1 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_size || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) { emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); } @@ -3589,8 +3753,8 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[3]) == 0 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_size || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG) { emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); DONE; @@ -3707,7 +3871,7 @@ ;; %%% Kill these when call knows how to work out a DFmode push earlier. (define_insn "*dummy_extendsfdf2" [(set (match_operand:DF 0 "push_operand" "=<") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY")))] + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))] "0" "#") @@ -3784,6 +3948,49 @@ } }) +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "")))] + "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) + { + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } + else + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) + (define_insn "*extendsfdf2_mixed" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") (float_extend:DF @@ -3793,13 +4000,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return "cvtss2sd\t{%1, %0|%0, %1}"; @@ -3823,73 +4025,13 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) - -(define_expand "extendsfxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "general_operand" "")))] - "TARGET_80387" -{ - /* ??? Needed for compress_float_constant since all fp constants - are LEGITIMATE_CONSTANT_P. */ - if (GET_CODE (operands[1]) == CONST_DOUBLE) - { - if (standard_80387_constant_p (operands[1]) > 0) - { - operands[1] = simplify_const_unary_operation - (FLOAT_EXTEND, XFmode, operands[1], SFmode); - emit_move_insn_1 (operands[0], operands[1]); - DONE; - } - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); - } -}) - -(define_insn "*extendsfxf2_i387" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - - default: - gcc_unreachable (); - } -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "SF,XF")]) -(define_expand "extenddfxf2" +(define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "general_operand" "")))] + (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))] "TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants @@ -3899,38 +4041,22 @@ if (standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation - (FLOAT_EXTEND, XFmode, operands[1], DFmode); + (FLOAT_EXTEND, XFmode, operands[1], mode); emit_move_insn_1 (operands[0], operands[1]); DONE; } - operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); + operands[1] = validize_mem (force_const_mem (mode, operands[1])); } }) -(define_insn "*extenddfxf2_i387" +(define_insn "*extendxf2_i387" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - default: - gcc_unreachable (); - } -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF,XF")]) + (set_attr "mode" ",XF")]) ;; %%% This seems bad bad news. ;; This cannot output into an f-reg because there is no way to be sure @@ -3952,12 +4078,58 @@ ; else { - rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP); + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (SFmode, slot); emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); DONE; } }) +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] +{ + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); +}) + (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0 "" "") (float_truncate:SF (match_operand:DF 1 "" ""))) @@ -3973,10 +4145,6 @@ switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; case 1: return output_387_reg_move (insn, operands); case 2: @@ -4018,10 +4186,8 @@ switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); + case 1: return "#"; case 2: @@ -4044,10 +4210,8 @@ switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); + case 1: return "#"; default: @@ -4065,12 +4229,7 @@ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) && !TARGET_MIX_SSE_I387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "SF")]) @@ -4086,162 +4245,93 @@ operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); }) -;; Conversion from XFmode to SFmode. +;; Conversion from XFmode to {SF,DF}mode -(define_expand "truncxfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) +(define_expand "truncxf2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] "TARGET_80387" { if (flag_unsafe_math_optimizations) { - rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode); - emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1])); + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (mode); + emit_insn (gen_truncxf2_i387_noop (reg, operands[1])); if (reg != operands[0]) emit_move_insn (operands[0], reg); DONE; } else - operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP); + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (mode, slot); + } }) (define_insn "*truncxfsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r") (float_truncate:SF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] + (match_operand:XF 1 "register_operand" "f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m"))] "TARGET_80387" { gcc_assert (!which_alternative); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") - (set_attr "mode" "SF")]) - -(define_insn "truncxfsf2_i387_noop" - [(set (match_operand:SF 0 "register_operand" "=f") - (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387 && flag_unsafe_math_optimizations" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_insn "*truncxfsf2_i387" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); } - [(set_attr "type" "fmov") + [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") (set_attr "mode" "SF")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -;; Conversion from XFmode to DFmode. - -(define_expand "truncxfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" -{ - if (flag_unsafe_math_optimizations) - { - rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode); - emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1])); - if (reg != operands[0]) - emit_move_insn (operands[0], reg); - DONE; - } - else - operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP); -}) - (define_insn "*truncxfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Y2*x") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?fY2*r") (float_truncate:DF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] + (match_operand:XF 1 "register_operand" "f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m"))] "TARGET_80387" { gcc_assert (!which_alternative); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); } - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") + [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") (set_attr "mode" "DF")]) -(define_insn "truncxfdf2_i387_noop" - [(set (match_operand:DF 0 "register_operand" "=f") - (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] +(define_insn "truncxf2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387 && flag_unsafe_math_optimizations" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "*truncxfdf2_i387" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF +(define_insn "*truncxf2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "register_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") (define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "memory_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387" - [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))] "") ;; Signed conversion to DImode. @@ -4261,7 +4351,7 @@ (define_expand "fix_truncdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:SSEMODEF 1 "register_operand" ""))) + (fix:DI (match_operand:MODEF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { @@ -4298,7 +4388,7 @@ (define_expand "fix_truncsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:SSEMODEF 1 "register_operand" ""))) + (fix:SI (match_operand:MODEF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" { @@ -4340,7 +4430,7 @@ [(parallel [(set (match_operand:SI 0 "register_operand" "") (unsigned_fix:SI - (match_operand:SSEMODEF 1 "nonimmediate_operand" ""))) + (match_operand:MODEF 1 "nonimmediate_operand" ""))) (use (match_dup 2)) (clobber (match_scratch: 3 "")) (clobber (match_scratch: 4 ""))])] @@ -4360,7 +4450,7 @@ (define_insn_and_split "*fixuns_trunc_1" [(set (match_operand:SI 0 "register_operand" "=&x,&x") (unsigned_fix:SI - (match_operand:SSEMODEF 3 "nonimmediate_operand" "xm,xm"))) + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) (use (match_operand: 4 "nonimmediate_operand" "m,x")) (clobber (match_scratch: 1 "=x,&x")) (clobber (match_scratch: 2 "=x,x"))] @@ -4377,90 +4467,54 @@ ;; Without these patterns, we'll try the unsigned SI conversion which ;; is complex for SSE, rather than the signed SI conversion, which isn't. -(define_expand "fixuns_truncsfhi2" +(define_expand "fixuns_trunchi2" [(set (match_dup 2) - (fix:SI (match_operand:SF 1 "nonimmediate_operand" ""))) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" ""))) (set (match_operand:HI 0 "nonimmediate_operand" "") (subreg:HI (match_dup 2) 0))] - "TARGET_SSE_MATH" - "operands[2] = gen_reg_rtx (SImode);") - -(define_expand "fixuns_truncdfhi2" - [(set (match_dup 2) - (fix:SI (match_operand:DF 1 "nonimmediate_operand" ""))) - (set (match_operand:HI 0 "nonimmediate_operand" "") - (subreg:HI (match_dup 2) 0))] - "TARGET_SSE_MATH && TARGET_SSE2" + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "operands[2] = gen_reg_rtx (SImode);") ;; When SSE is available, it is always faster to use it! -(define_insn "fix_truncsfdi_sse" +(define_insn "fix_truncdi_sse" [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttss2si{q}\t{%1, %0|%0, %1}" + (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "TARGET_64BIT && SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvtts2si{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) - -(define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) - -(define_insn "fix_truncsfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double")]) -(define_insn "fix_truncdfsi_sse" +(define_insn "fix_truncsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttsd2si\t{%1, %0|%0, %1}" + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvtts2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double")]) ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. (define_peephole2 - [(set (match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "memory_operand" "")) + [(set (match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "memory_operand" "")) (set (match_operand:SSEMODEI24 2 "register_operand" "") (fix:SSEMODEI24 (match_dup 0)))] - "!TARGET_K8 - && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] - "") - -(define_peephole2 - [(set (match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "memory_operand" "")) - (set (match_operand:SSEMODEI24 2 "register_operand" "") - (fix:SSEMODEI24 (match_dup 0)))] - "!TARGET_K8 + "TARGET_SHORTEN_X87_SSE && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] "") ;; Avoid vector decoded forms of the instruction. (define_peephole2 - [(match_scratch:DF 2 "Y") + [(match_scratch:DF 2 "Y2") (set (match_operand:SSEMODEI24 0 "register_operand" "") (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + "TARGET_AVOID_VECTOR_DECODE && !optimize_size" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] "") @@ -4469,16 +4523,16 @@ [(match_scratch:SF 2 "x") (set (match_operand:SSEMODEI24 0 "register_operand" "") (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + "TARGET_AVOID_VECTOR_DECODE && !optimize_size" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] "") (define_insn_and_split "fix_trunc_fisttp_i387_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" "")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH) @@ -4505,8 +4559,8 @@ [(set (match_operand:X87MODEI 0 "memory_operand" "=m") (fix:X87MODEI (match_operand 1 "register_operand" "f"))) (clobber (match_scratch:XF 2 "=&1f"))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH)" @@ -4519,8 +4573,8 @@ (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m")) (clobber (match_scratch:XF 3 "=&1f,&1f"))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH)" @@ -4555,11 +4609,11 @@ ;; clobbering insns can be used. Look at emit_i387_cw_initialization () ;; function in i386.c. (define_insn_and_split "*fix_trunc_i387_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && !(reload_completed || reload_in_progress)" @@ -4593,8 +4647,8 @@ (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) (clobber (match_scratch:XF 4 "=&1f"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fistp") @@ -4608,8 +4662,8 @@ (use (match_operand:HI 3 "memory_operand" "m,m")) (clobber (match_operand:DI 4 "memory_operand" "=m,m")) (clobber (match_scratch:XF 5 "=&1f,&1f"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "#" [(set_attr "type" "fistp") @@ -4650,8 +4704,8 @@ (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fistp") @@ -4664,8 +4718,8 @@ (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "#" [(set_attr "type" "fistp") @@ -4715,48 +4769,122 @@ (set_attr "mode" "HI") (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) ;; Conversion between fixed point and floating point. ;; Even though we only accept memory inputs, the backend _really_ ;; wants to be able to do this between registers. -(define_expand "floathisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "floathi2" + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { - if (TARGET_SSE_MATH) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { - emit_insn (gen_floatsisf2 (operands[0], - convert_to_mode (SImode, operands[1], 0))); + emit_insn + (gen_floatsi2 (operands[0], + convert_to_mode (SImode, operands[1], 0))); DONE; } }) -(define_insn "*floathisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" +(define_insn "*floathi2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (float:MODEF + (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") + (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_expand "floatsisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "") +(define_expand "floatsi2" + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + " + /* When we use vector converts, we can't have input in memory. */ + if (GET_MODE (operands[0]) == DFmode + && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (DFmode)) + operands[1] = force_reg (SImode, operands[1]); + else if (GET_MODE (operands[0]) == SFmode + && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (SFmode)) + { + /* When !flag_trapping_math, we handle SImode->SFmode vector + conversions same way as SImode->DFmode. + + For flat_trapping_math we can't safely use vector conversion without + clearing upper half, otherwise precision exception might occur. + However we can still generate the common sequence converting value + from general register to XMM register as: + + mov reg32, mem32 + movd mem32, xmm + cvtdq2pd xmm,xmm + + because we know that movd clears the upper half. + + Sadly in this case we can't rely on reload moving the value to XMM + register, since we need to know if upper half is OK, so we need + to do reloading by hand. We force operand to memory unless target + supports inter unit moves. */ + if (!flag_trapping_math) + operands[1] = force_reg (SImode, operands[1]); + else if (!MEM_P (operands[1])) + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx tmp = assign_386_stack_local (SImode, slot); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + } + /* Offload operand of cvtsi2ss and cvtsi2sd into memory for + !TARGET_INTER_UNIT_CONVERSIONS + It is necessary for the patterns to not accept nonmemory operands + as we would optimize out later. */ + else if (!TARGET_INTER_UNIT_CONVERSIONS + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && !optimize_size + && !MEM_P (operands[1])) + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + ") + +(define_insn "*floatsisf2_mixed_vector" + [(set (match_operand:SF 0 "register_operand" "=x,f,?f") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))] + "TARGET_MIX_SSE_I387 && !flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "@ + cvtdq2ps\t{%1, %0|%0, %1} + fild%z1\t%1 + #" + [(set_attr "type" "sseicvt,fmov,multi") + (set_attr "mode" "SF") + (set_attr "unit" "*,i387,*") + (set_attr "athlon_decode" "double,*,*") + (set_attr "amdfam10_decode" "double,*,*") + (set_attr "fp_int_src" "false,true,true")]) (define_insn "*floatsisf2_mixed" [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_MIX_SSE_I387" + (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))] + "TARGET_MIX_SSE_I387 + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" "@ fild%z1\t%1 # @@ -4769,142 +4897,295 @@ (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_SSE_MATH" - "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" +(define_insn "*floatsisf2_mixed_memory" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (float:SF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" "@ fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") + cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") (set_attr "mode" "SF") - (set_attr "unit" "*,i387") + (set_attr "athlon_decode" "*,double") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_expand "floatdisf2" +(define_insn "*floatsisf2_sse_vector_nointernunit" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE_MATH && flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && !TARGET_INTER_UNIT_MOVES" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*floatsisf2_sse_vector_internunit" + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "rm,x")))] + "TARGET_SSE_MATH && flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && TARGET_INTER_UNIT_MOVES" + "#" + [(set_attr "type" "multi")]) + +(define_split [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" - "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && reload_completed + && (TARGET_INTER_UNIT_MOVES || MEM_P (operands[1])) + && !SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (float:V4SF (match_dup 2)))] +{ + operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0); + operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1])); +}) -(define_insn "*floatdisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_64BIT && TARGET_MIX_SSE_I387" - "@ - fild%z1\t%1 - # - cvtsi2ss{q}\t{%1, %0|%0, %1} - cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "register_operand" "")))] + "flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && reload_completed + && SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])" + [(set (match_dup 2) (vec_duplicate:V4SI (match_dup 1))) + (set (match_dup 0) + (float:V4SF (match_dup 2)))] +{ + operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0); + operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); +}) + +(define_insn "*floatsisf2_sse_vector" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:SI 1 "register_operand" "x")))] + "TARGET_SSE_MATH && !flag_trapping_math + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && !TARGET_INTER_UNIT_MOVES" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") (set_attr "mode" "SF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,vector,double") - (set_attr "amdfam10_decode" "*,*,vector,double") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdisf2_sse" +(define_insn "*floatsisf2_sse" [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_64BIT && TARGET_SSE_MATH" - "cvtsi2ss{q}\t{%1, %0|%0, %1}" + (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + "TARGET_SSE_MATH + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" + "cvtsi2ss\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "SF") (set_attr "athlon_decode" "vector,double") (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" +(define_insn "*floatsisf2_sse_memory" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsidf2_mixed_vector" + [(set (match_operand:DF 0 "register_operand" "=x,f,f") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" "@ + cvtdq2pd\t{%1, %0|%0, %1} fild%z1\t%1 #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387") + [(set_attr "type" "sseicvt,fmov,multi") + (set_attr "mode" "V2DF,DF,DF") + (set_attr "unit" "*,*,i387") + (set_attr "athlon_decode" "double,*,*") + (set_attr "amdfam10_decode" "double,*,*") + (set_attr "fp_int_src" "false,true,true")]) + +(define_insn "*floatsidf2_mixed" + [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" + "@ + fild%z1\t%1 + # + cvtsi2sd\t{%1, %0|%0, %1} + cvtsi2sd\t{%1, %0|%0, %1} + cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") + (set_attr "mode" "DF,DF,DF,DF,V2DF") + (set_attr "unit" "*,i387,*,*,*") + (set_attr "athlon_decode" "*,*,double,direct,double") + (set_attr "amdfam10_decode" "*,*,vector,double,double") + (set_attr "fp_int_src" "true,true,true,true,false")]) + +(define_insn "*floatsidf2_mixed_memory" + [(set (match_operand:DF 0 "register_operand" "=f,x") + (float:DF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_expand "floathidf2" +(define_insn "*floatsidf2_sse_vector" + [(set (match_operand:DF 0 "register_operand" "=x") + (float:DF (match_operand:SI 1 "register_operand" "x")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "V2DF") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_split [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" -{ - if (TARGET_SSE2 && TARGET_SSE_MATH) - { - emit_insn (gen_floatsidf2 (operands[0], - convert_to_mode (SImode, operands[1], 0))); - DONE; - } + (float:DF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_USE_VECTOR_CONVERTS && reload_completed + && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (float:V2DF + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V4SImode, operands[0], DFmode, 0); + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1])); }) -(define_insn "*floathidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" +(define_insn "*floatsidf2_sse" + [(set (match_operand:DF 0 "register_operand" "=x,x,!x") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" + "@ + cvtsi2sd\t{%1, %0|%0, %1} + cvtsi2sd\t{%1, %0|%0, %1} + cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF,DF,V2DF") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsidf2_memory" + [(set (match_operand:DF 0 "register_operand" "=x") + (float:DF (match_operand:SI 1 "memory_operand" "x")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" + "cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387 + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_expand "floatsidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" +{ + if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode) + && !optimize_size + && !MEM_P (operands[1])) + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } +}) -(define_insn "*floatsidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_SSE2 && TARGET_MIX_SSE_I387" +(define_insn "*floatdisf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] + "TARGET_64BIT && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 # - cvtsi2sd\t{%1, %0|%0, %1} - cvtsi2sd\t{%1, %0|%0, %1}" + cvtsi2ss{q}\t{%1, %0|%0, %1} + cvtsi2ss{q}\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "SF") (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "athlon_decode" "*,*,vector,double") (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "cvtsi2sd\t{%1, %0|%0, %1}" +(define_insn "*floatdisf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (float:SF (match_operand:DI 1 "memory_operand" "m,m")))] + "TARGET_64BIT && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "*,double") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdisf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] + "TARGET_64BIT && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "cvtsi2ss{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,double") (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387") +(define_insn "*floatdisf2_memory" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_64BIT && TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) (define_expand "floatdidf2" @@ -4917,12 +5198,23 @@ ix86_expand_convert_sign_didf_sse (operands[0], operands[1]); DONE; } + if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode) + && !optimize_size + && !MEM_P (operands[1])) + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } }) (define_insn "*floatdidf2_mixed" [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387" + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 # @@ -4935,10 +5227,25 @@ (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatdidf2_mixed_memory" + [(set (match_operand:DF 0 "register_operand" "=f,x") + (float:DF (match_operand:DI 1 "memory_operand" "m,m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatdidf2_sse" [(set (match_operand:DF 0 "register_operand" "=x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" + (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "cvtsi2sd{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") @@ -4946,45 +5253,36 @@ (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") +(define_insn "*floatdidf2_sse_memory" + [(set (match_operand:DF 0 "register_operand" "=x") + (float:DF (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") (set_attr "mode" "DF") - (set_attr "unit" "*,i387") - (set_attr "fp_int_src" "true")]) - -(define_insn "floathixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "unit" "*,i387") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "floatsixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" +(define_insn "*floatdi2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (float:MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] + "TARGET_80387 + && (!TARGET_SSE_MATH || !TARGET_64BIT + || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") + (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_insn "floatdixf2" +(define_insn "floatxf2" [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] + (float:XF (match_operand:X87MODEI 1 "nonimmediate_operand" "m,?r")))] "TARGET_80387" "@ fild%z1\t%1 @@ -4999,8 +5297,7 @@ [(set (match_operand 0 "fp_register_operand" "") (float (match_operand 1 "register_operand" "")))] "reload_completed - && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[0]))" + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); @@ -5010,24 +5307,15 @@ DONE; }) -(define_expand "floatunssisf2" - [(use (match_operand:SF 0 "register_operand" "")) +(define_expand "floatunssi2" + [(use (match_operand:MODEF 0 "register_operand" "")) (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT" + "!TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" { - if (TARGET_SSE_MATH && TARGET_SSE2) - ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]); - else - x86_emit_floatuns (operands); + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); DONE; }) -(define_expand "floatunssidf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2" - "ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;") - (define_expand "floatunsdisf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:DI 1 "nonimmediate_operand" ""))] @@ -5037,8 +5325,8 @@ (define_expand "floatunsdidf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DI 1 "nonimmediate_operand" ""))] - "TARGET_SSE_MATH && TARGET_SSE2 - && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)" + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" { if (TARGET_64BIT) x86_emit_floatuns (operands); @@ -5047,9 +5335,6 @@ DONE; }) -;; SSE extract/set expanders - - ;; Add instructions ;; %%% splits for addditi3 @@ -5065,7 +5350,7 @@ (define_insn "*addti3_1" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0") - (match_operand:TI 2 "general_operand" "roiF,riF"))) + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)" "#") @@ -5073,7 +5358,7 @@ (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") - (match_operand:TI 2 "general_operand" ""))) + (match_operand:TI 2 "x86_64_general_operand" ""))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] @@ -5151,6 +5436,56 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) +(define_insn "*3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*add3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "m")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*sub3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (match_dup 0)))] + "" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*si3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + (define_insn "addqi3_carry" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") @@ -5192,7 +5527,7 @@ (zero_extend:DI (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0")) - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" @@ -6770,18 +7105,11 @@ "TARGET_80387" "") -(define_expand "adddf3" - [(set (match_operand:DF 0 "register_operand" "") - (plus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "addsf3" - [(set (match_operand:SF 0 "register_operand" "") - (plus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "add3" + [(set (match_operand:MODEF 0 "register_operand" "") + (plus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Subtract instructions @@ -6799,7 +7127,7 @@ (define_insn "*subti3_1" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0") - (match_operand:TI 2 "general_operand" "roiF,riF"))) + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)" "#") @@ -6807,7 +7135,7 @@ (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") - (match_operand:TI 2 "general_operand" ""))) + (match_operand:TI 2 "x86_64_general_operand" ""))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) @@ -6941,11 +7269,11 @@ (set_attr "mode" "SI")]) (define_insn "subsi3_carry_zext" - [(set (match_operand:DI 0 "register_operand" "=rm,r") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (minus:SI (match_operand:SI 1 "register_operand" "0") (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") - (match_operand:SI 2 "general_operand" "ri,rm"))))) + (match_operand:SI 2 "general_operand" "g"))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %k0|%k0, %2}" @@ -6975,7 +7303,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sub{l}\t{%2, %k0|%k0, %2}" @@ -7000,7 +7328,7 @@ [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -7027,7 +7355,7 @@ (define_insn "*subsi_3_zext" [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim"))) + (match_operand:SI 2 "general_operand" "g"))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_dup 1) @@ -7146,18 +7474,11 @@ "TARGET_80387" "") -(define_expand "subdf3" - [(set (match_operand:DF 0 "register_operand" "") - (minus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "subsf3" - [(set (match_operand:SF 0 "register_operand" "") - (minus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "sub3" + [(set (match_operand:MODEF 0 "register_operand" "") + (minus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Multiply instructions @@ -7170,11 +7491,11 @@ "TARGET_64BIT" "") -;; On AMDFAM10 +;; On AMDFAM10 ;; IMUL reg64, reg64, imm8 Direct ;; IMUL reg64, mem64, imm8 VectorPath ;; IMUL reg64, reg64, imm32 Direct -;; IMUL reg64, mem64, imm32 VectorPath +;; IMUL reg64, mem64, imm32 VectorPath ;; IMUL reg64, reg64 Direct ;; IMUL reg64, mem64 Direct @@ -7204,7 +7525,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "DI")]) (define_expand "mulsi3" @@ -7215,7 +7536,7 @@ "" "") -;; On AMDFAM10 +;; On AMDFAM10 ;; IMUL reg32, reg32, imm8 Direct ;; IMUL reg32, mem32, imm8 VectorPath ;; IMUL reg32, reg32, imm32 Direct @@ -7248,7 +7569,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "SI")]) (define_insn "*mulsi3_1_zext" @@ -7278,7 +7599,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "SI")]) (define_expand "mulhi3" @@ -7346,7 +7667,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulqihi3" @@ -7373,7 +7694,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "mulqihi3" @@ -7398,7 +7719,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulditi3" @@ -7425,7 +7746,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers @@ -7453,7 +7774,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "mulditi3" @@ -7507,7 +7828,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "umuldi3_highpart" @@ -7544,7 +7865,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "umulsi3_highpart" @@ -7607,7 +7928,7 @@ (set_attr "mode" "SI")]) (define_expand "smuldi3_highpart" - [(parallel [(set (match_operand:DI 0 "register_operand" "=d") + [(parallel [(set (match_operand:DI 0 "register_operand" "") (truncate:DI (lshiftrt:TI (mult:TI (sign_extend:TI @@ -7708,19 +8029,15 @@ "TARGET_80387" "") -(define_expand "muldf3" - [(set (match_operand:DF 0 "register_operand" "") - (mult:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +(define_expand "mul3" + [(set (match_operand:MODEF 0 "register_operand" "") + (mult:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") -(define_expand "mulsf3" - [(set (match_operand:SF 0 "register_operand" "") - (mult:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "") +;; SSE5 scalar multiply/add instructions are defined in sse.md. + ;; Divide instructions @@ -7765,7 +8082,16 @@ (div:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] "TARGET_80387 || TARGET_SSE_MATH" - "") +{ + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) ;; Remainder instructions. @@ -8561,7 +8887,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" "and{l}\t{%2, %k0|%k0, %2}" @@ -8571,7 +8897,7 @@ (define_insn "*andsi_2" [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (and:SI (match_dup 1) (match_dup 2)))] @@ -8585,7 +8911,7 @@ (define_insn "*andsi_2_zext" [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] @@ -8630,7 +8956,7 @@ (define_insn "*andhi_2" [(set (reg FLAGS_REG) (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (and:HI (match_dup 1) (match_dup 2)))] @@ -8939,7 +9265,7 @@ (define_insn "*iorsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rmi"))) + (match_operand:SI 2 "general_operand" "ri,g"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %0|%0, %2}" @@ -8948,10 +9274,10 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*iorsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %k0|%k0, %2}" @@ -8959,7 +9285,7 @@ (set_attr "mode" "SI")]) (define_insn "*iorsi_1_zext_imm" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) (clobber (reg:CC FLAGS_REG))] @@ -8971,7 +9297,7 @@ (define_insn "*iorsi_2" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (ior:SI (match_dup 1) (match_dup 2)))] @@ -8986,7 +9312,7 @@ (define_insn "*iorsi_2_zext" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] @@ -9012,7 +9338,7 @@ (define_insn "*iorsi_3" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9032,7 +9358,7 @@ (define_insn "*iorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) + (match_operand:HI 2 "general_operand" "g,ri"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, HImode, operands)" "or{w}\t{%2, %0|%0, %2}" @@ -9042,7 +9368,7 @@ (define_insn "*iorhi_2" [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (ior:HI (match_dup 1) (match_dup 2)))] @@ -9055,7 +9381,7 @@ (define_insn "*iorhi_3" [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9328,7 +9654,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" "xor{l}\t{%2, %k0|%k0, %2}" @@ -9348,7 +9674,7 @@ (define_insn "*xorsi_2" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (xor:SI (match_dup 1) (match_dup 2)))] @@ -9363,7 +9689,7 @@ (define_insn "*xorsi_2_zext" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] @@ -9389,7 +9715,7 @@ (define_insn "*xorsi_3" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9409,7 +9735,7 @@ (define_insn "*xorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) + (match_operand:HI 2 "general_operand" "g,ri"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, HImode, operands)" "xor{w}\t{%2, %0|%0, %2}" @@ -9419,7 +9745,7 @@ (define_insn "*xorhi_2" [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (xor:HI (match_dup 1) (match_dup 2)))] @@ -9432,7 +9758,7 @@ (define_insn "*xorhi_3" [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9701,7 +10027,7 @@ (define_insn "*negti2_1" [(set (match_operand:TI 0 "nonimmediate_operand" "=ro") - (neg:TI (match_operand:TI 1 "general_operand" "0"))) + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_unary_operator_ok (NEG, TImode, operands)" @@ -9709,7 +10035,7 @@ (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") - (neg:TI (match_operand:TI 1 "general_operand" ""))) + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed" [(parallel @@ -9907,218 +10233,64 @@ ;; Changing of sign for FP values is doable using integer unit too. -(define_expand "negsf2" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;") - -(define_expand "abssf2" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;") - -(define_insn "*absnegsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x ,x,f,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0,0 ")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0,X,X ")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_insn "*absnegsf2_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_insn "*absnegsf2_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,0")])) - (use (match_operand 2 "" "")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_expand "copysignsf3" - [(match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "nonmemory_operand" "") - (match_operand:SF 2 "register_operand" "")] - "TARGET_SSE_MATH" -{ - ix86_expand_copysign (operands); - DONE; -}) - -(define_insn_and_split "copysignsf3_const" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF - [(match_operand:V4SF 1 "vector_move_operand" "xmC") - (match_operand:SF 2 "register_operand" "0") - (match_operand:V4SF 3 "nonimmediate_operand" "xm")] - UNSPEC_COPYSIGN))] - "TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_const (operands); - DONE; -}) - -(define_insn "copysignsf3_var" - [(set (match_operand:SF 0 "register_operand" "=x, x, x, x,x") - (unspec:SF - [(match_operand:SF 2 "register_operand" " x, 0, 0, x,x") - (match_operand:SF 3 "register_operand" " 1, 1, x, 1,x") - (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0") - (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V4SF 1 "=x, x, x, x,x"))] - "TARGET_SSE_MATH" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (unspec:SF - [(match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "") - (match_operand:V4SF 4 "" "") - (match_operand:V4SF 5 "" "")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V4SF 1 ""))] - "TARGET_SSE_MATH && reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_var (operands); - DONE; -}) - -(define_expand "negdf2" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;") +(define_expand "neg2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (neg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (NEG, mode, operands); DONE;") -(define_expand "absdf2" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;") +(define_expand "abs2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (abs:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (ABS, mode, operands); DONE;") -(define_insn "*absnegdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,f,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0,0")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X,X")) +(define_insn "*absneg2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand: 2 "nonimmediate_operand" "xm,0,X,X")) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" "#") -(define_insn "*absnegdf2_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0 ")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X ")) +(define_insn "*absneg2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand: 2 "register_operand" "xm,0,X")) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "#") -(define_insn "*absnegdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,0")])) +(define_insn "*absneg2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) (use (match_operand 2 "" "")) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" - "#") - -(define_expand "copysigndf3" - [(match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "nonmemory_operand" "") - (match_operand:DF 2 "register_operand" "")] - "TARGET_SSE2 && TARGET_SSE_MATH" -{ - ix86_expand_copysign (operands); - DONE; -}) - -(define_insn_and_split "copysigndf3_const" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF - [(match_operand:V2DF 1 "vector_move_operand" "xmC") - (match_operand:DF 2 "register_operand" "0") - (match_operand:V2DF 3 "nonimmediate_operand" "xm")] - UNSPEC_COPYSIGN))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_const (operands); - DONE; -}) - -(define_insn "copysigndf3_var" - [(set (match_operand:DF 0 "register_operand" "=x, x, x, x,x") - (unspec:DF - [(match_operand:DF 2 "register_operand" " x, 0, 0, x,x") - (match_operand:DF 3 "register_operand" " 1, 1, x, 1,x") - (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0") - (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V2DF 1 "=x, x, x, x,x"))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "#") -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (unspec:DF - [(match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "") - (match_operand:V2DF 4 "" "") - (match_operand:V2DF 5 "" "")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V2DF 1 ""))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_var (operands); - DONE; -}) - -(define_expand "negxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))] - "TARGET_80387" - "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;") +(define_expand "negtf2" + [(set (match_operand:TF 0 "register_operand" "") + (neg:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_64BIT" + "ix86_expand_fp_absneg_operator (NEG, TFmode, operands); DONE;") -(define_expand "absxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))] - "TARGET_80387" - "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;") +(define_expand "abstf2" + [(set (match_operand:TF 0 "register_operand" "") + (abs:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_64BIT" + "ix86_expand_fp_absneg_operator (ABS, TFmode, operands); DONE;") -(define_insn "*absnegxf2_i387" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm") - (match_operator:XF 3 "absneg_operator" - [(match_operand:XF 1 "nonimmediate_operand" "0,0")])) - (use (match_operand 2 "" "")) +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)" + "TARGET_64BIT" "#") ;; Splitters for fp abs and neg. @@ -10241,89 +10413,32 @@ } else { - tmp = GEN_INT (0x8000); - tmp = gen_rtx_XOR (SImode, operands[0], tmp); - } - operands[1] = tmp; -}) - -(define_split - [(set (match_operand 0 "memory_operand" "") - (match_operator 1 "absneg_operator" [(match_dup 0)])) - (use (match_operand 2 "" "")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed" - [(parallel [(set (match_dup 0) (match_dup 1)) - (clobber (reg:CC FLAGS_REG))])] -{ - enum machine_mode mode = GET_MODE (operands[0]); - int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode); - rtx tmp; - - operands[0] = adjust_address (operands[0], QImode, size - 1); - if (GET_CODE (operands[1]) == ABS) - { - tmp = gen_int_mode (0x7f, QImode); - tmp = gen_rtx_AND (QImode, operands[0], tmp); - } - else - { - tmp = gen_int_mode (0x80, QImode); - tmp = gen_rtx_XOR (QImode, operands[0], tmp); - } - operands[1] = tmp; -}) - -;; Conditionalize these after reload. If they match before reload, we -;; lose the clobber and ability to use integer instructions. - -(define_insn "*negsf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "SF")]) - -(define_insn "*negdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*negxf2_1" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) -(define_insn "*abssf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (abs:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "SF")]) +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. -(define_insn "*absdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" - "fabs" +(define_insn "*neg2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (neg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + "fchs" [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "*absxf2_1" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387" +(define_insn "*abs2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (abs:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" "fabs" [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) (define_insn "*negextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") @@ -10378,6 +10493,70 @@ "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) + +;; Copysign instructions + +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) + +(define_expand "copysign3" + [(match_operand:CSGNMODE 0 "register_operand" "") + (match_operand:CSGNMODE 1 "nonmemory_operand" "") + (match_operand:CSGNMODE 2 "register_operand" "")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysign3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand: 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand: 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysign3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand: 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand: 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" + "#") + +(define_split + [(set (match_operand:CSGNMODE 0 "register_operand" "") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "") + (match_operand:CSGNMODE 3 "register_operand" "") + (match_operand: 4 "" "") + (match_operand: 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 ""))] + "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))) + && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) ;; One complement instructions @@ -10636,6 +10815,22 @@ "#" [(set_attr "type" "multi")]) +;; This pattern must be defined before *ashlti3_2 to prevent +;; combine pass from converting sse2_ashlti3 to *ashlti3_2. + +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + (define_insn "*ashlti3_2" [(set (match_operand:TI 0 "register_operand" "=r") (ashift:TI (match_operand:TI 1 "register_operand" "0") @@ -10679,7 +10874,7 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_64_shift_adj" [(set (reg:CCZ FLAGS_REG) @@ -10771,13 +10966,14 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashift:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, DImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { switch (get_attr_type (insn)) { @@ -10812,13 +11008,14 @@ (match_operand:QI 2 "immediate_operand" "e")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, DImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { switch (get_attr_type (insn)) { @@ -10875,7 +11072,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") @@ -10895,7 +11092,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_shift_adj_1" [(set (reg:CCZ FLAGS_REG) @@ -11099,13 +11296,13 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashift:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11140,13 +11337,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11182,13 +11379,14 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11306,13 +11504,13 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { @@ -11347,13 +11545,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { @@ -11510,13 +11708,13 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashift:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { @@ -11551,13 +11749,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { @@ -11656,7 +11854,7 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "ashrdi3" [(set (match_operand:DI 0 "shiftdi_operand" "") @@ -11687,8 +11885,9 @@ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11719,8 +11918,9 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11736,8 +11936,9 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11754,10 +11955,10 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -11769,10 +11970,10 @@ (match_operand:QI 2 "const_int_operand" "n")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -11806,7 +12007,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") @@ -11900,8 +12101,8 @@ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11914,8 +12115,9 @@ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11955,8 +12157,8 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11972,8 +12174,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11987,8 +12189,9 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") @@ -12005,10 +12208,9 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12020,10 +12222,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12036,10 +12237,10 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12057,8 +12258,8 @@ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12089,8 +12290,8 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -12106,8 +12307,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -12124,10 +12325,9 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12139,10 +12339,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12160,8 +12359,8 @@ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12174,9 +12373,9 @@ (ashiftrt:QI (match_dup 0) (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (! TARGET_PARTIAL_REG_STALL || optimize_size) - && (TARGET_SHIFT1 || optimize_size)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift1") (set (attr "length") @@ -12220,8 +12419,8 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -12237,8 +12436,8 @@ (match_operand:QI 2 "const1_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -12255,10 +12454,9 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12270,10 +12468,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12309,6 +12506,22 @@ "#" [(set_attr "type" "multi")]) +;; This pattern must be defined before *lshrti3_2 to prevent +;; combine pass from converting sse2_lshrti3 to *lshrti3_2. + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + (define_insn "*lshrti3_2" [(set (match_operand:TI 0 "register_operand" "=r") (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") @@ -12349,8 +12562,9 @@ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12381,8 +12595,9 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -12398,8 +12613,9 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -12416,10 +12632,10 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -12431,10 +12647,10 @@ (match_operand:QI 2 "const_int_operand" "e")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -12468,7 +12684,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") @@ -12485,8 +12701,8 @@ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12499,8 +12715,9 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -12541,8 +12758,8 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12558,8 +12775,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12573,8 +12790,9 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") @@ -12591,10 +12809,9 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12606,10 +12823,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12622,10 +12838,10 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12643,8 +12859,8 @@ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12675,8 +12891,8 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12692,8 +12908,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12710,10 +12926,9 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12725,10 +12940,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12746,8 +12960,8 @@ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12805,8 +13019,8 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12822,8 +13036,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12840,10 +13054,9 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12855,10 +13068,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12914,8 +13126,9 @@ (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, DImode, operands)" "rol{q}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -12948,8 +13161,8 @@ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -12963,8 +13176,9 @@ (rotate:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%k0" [(set_attr "type" "rotate") (set_attr "length" "2")]) @@ -13007,8 +13221,8 @@ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, HImode, operands)" "rol{w}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13028,6 +13242,16 @@ [(set_attr "type" "rotate") (set_attr "mode" "HI")]) +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + (define_expand "rotlqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -13055,8 +13279,8 @@ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, QImode, operands)" "rol{b}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13138,8 +13362,9 @@ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, DImode, operands)" "ror{q}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13172,8 +13397,8 @@ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13187,8 +13412,9 @@ (rotatert:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%k0" [(set_attr "type" "rotate") (set (attr "length") @@ -13234,8 +13460,8 @@ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, HImode, operands)" "ror{w}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13243,7 +13469,7 @@ (const_string "2") (const_string "*")))]) -(define_insn "*rotrhi3" +(define_insn "*rotrhi3_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "nonmemory_operand" "I,c"))) @@ -13255,6 +13481,16 @@ [(set_attr "type" "rotate") (set_attr "mode" "HI")]) +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotatert:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + (define_expand "rotrqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -13268,8 +13504,8 @@ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, QImode, operands)" "ror{b}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13728,25 +13964,26 @@ ;; 0xffffffff is NaN, but not in normalized form, so we can't represent ;; it directly. -(define_insn "*sse_setccsf" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 1 "sse_comparison_operator" - [(match_operand:SF 2 "register_operand" "0") - (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE" - "cmp%D1ss\t{%3, %0|%0, %3}" +(define_insn "*sse_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && !TARGET_SSE5" + "cmp%D1s\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) + +(define_insn "*sse5_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse5_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE5" + "com%Y1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "")]) -(define_insn "*sse_setccdf" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 1 "sse_comparison_operator" - [(match_operand:DF 2 "register_operand" "0") - (match_operand:DF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2" - "cmp%D1sd\t{%3, %0|%0, %3}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) ;; Basic conditional jump instructions. ;; We ignore the overflow flag for signed branch instructions. @@ -14026,8 +14263,8 @@ (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14071,8 +14308,8 @@ (label_ref (match_operand 3 "" "")))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14125,8 +14362,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14141,8 +14377,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14157,8 +14392,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && !ix86_use_fcomi_compare (GET_CODE (operands[0])) && SELECT_CC_MODE (GET_CODE (operands[0]), @@ -14182,8 +14416,8 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 5 "=a,a"))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && FLOAT_MODE_P (GET_MODE (operands[3])) + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && TARGET_USE_MODE_FIOP && GET_MODE (operands[1]) == GET_MODE (operands[3]) && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode @@ -14287,7 +14521,7 @@ (set_attr "modrm" "0")]) (define_expand "indirect_jump" - [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] "" "") @@ -14306,7 +14540,7 @@ (set_attr "length_immediate" "0")]) (define_expand "tablejump" - [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm")) + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "")) (use (label_ref (match_operand 1 "" "")))])] "" { @@ -14317,7 +14551,9 @@ rtx op0, op1; enum rtx_code code; - if (TARGET_64BIT) + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) { code = PLUS; op0 = operands[0]; @@ -14512,7 +14748,8 @@ (define_insn "*call_1_rex64" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" { if (constant_call_address_operand (operands[0], Pmode)) return "call\t%P0"; @@ -14520,6 +14757,13 @@ } [(set_attr "type" "call")]) +(define_insn "*call_1_rex64_large" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" ""))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "call\t%A0" + [(set_attr "type" "call")]) + (define_insn "*sibcall_1_rex64" [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" "")) (match_operand 1 "" ""))] @@ -14531,7 +14775,7 @@ [(call (mem:QI (reg:DI R11_REG)) (match_operand 0 "" ""))] "SIBLING_CALL_P (insn) && TARGET_64BIT" - "jmp\t*%%r11" + "jmp\t{*%%}r11" [(set_attr "type" "call")]) @@ -14606,7 +14850,7 @@ registers we stored in the result block. We avoid problems by claiming that all hard registers are used and clobbered at this point. */ - emit_insn (gen_blockage (const0_rtx)); + emit_insn (gen_blockage ()); DONE; }) @@ -14617,7 +14861,15 @@ ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" - [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)] + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)] "" "" [(set_attr "length" "0")]) @@ -14653,7 +14905,7 @@ [(return) (unspec [(const_int 0)] UNSPEC_REP)] "reload_completed" - "rep {;} ret" + "rep\;ret" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") @@ -14705,7 +14957,7 @@ [(set_attr "length" "16")]) (define_expand "prologue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_prologue (); DONE;") @@ -14732,17 +14984,33 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] "TARGET_64BIT" - "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_insn "set_rip_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_RIP))] + "TARGET_64BIT" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" [(set_attr "type" "lea") (set_attr "length" "6")]) +(define_insn "set_got_offset_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_GOT_OFFSET))] + "TARGET_64BIT" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" + [(set_attr "type" "imov") + (set_attr "length" "11")]) + (define_expand "epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (1); DONE;") (define_expand "sibcall_epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (0); DONE;") @@ -14775,7 +15043,7 @@ "!TARGET_64BIT" "#" "reload_completed" - [(const_int 1)] + [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_insn_and_split "eh_return_di" @@ -14785,7 +15053,7 @@ "TARGET_64BIT" "#" "reload_completed" - [(const_int 1)] + [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_insn "leave" @@ -14811,36 +15079,40 @@ (clobber (match_scratch:SI 2 "")) (clobber (reg:CC FLAGS_REG))])] "" - "") +{ + if (TARGET_CMOVE) + { + emit_insn (gen_ffs_cmove (operands[0], operands[1])); + DONE; + } +}) -(define_insn_and_split "*ffs_cmove" - [(set (match_operand:SI 0 "register_operand" "=r") - (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:SI 2 "=&r")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_CMOVE" - "#" - "&& reload_completed" +(define_expand "ffs_cmove" [(set (match_dup 2) (const_int -1)) - (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) - (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (ctz:SI (match_dup 1)))]) (set (match_dup 0) (if_then_else:SI (eq (reg:CCZ FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] - "") + "TARGET_CMOVE" + "operands[2] = gen_reg_rtx (SImode);") (define_insn_and_split "*ffs_no_cmove" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (match_scratch:SI 2 "=&q")) (clobber (reg:CC FLAGS_REG))] - "" + "!TARGET_CMOVE" "#" - "reload_completed" - [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) + "&& reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) (set (match_dup 0) (ctz:SI (match_dup 1)))]) (set (strict_low_part (match_dup 3)) (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) @@ -14866,33 +15138,20 @@ [(set_attr "prefix_0f" "1")]) (define_expand "ffsdi2" - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (ffs:DI (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (match_scratch:DI 2 "")) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT && TARGET_CMOVE" - "") - -(define_insn_and_split "*ffs_rex64" - [(set (match_operand:DI 0 "register_operand" "=r") - (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:DI 2 "=&r")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && TARGET_CMOVE" - "#" - "&& reload_completed" [(set (match_dup 2) (const_int -1)) (parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (match_dup 1) (const_int 0))) - (set (match_dup 0) (ctz:DI (match_dup 1)))]) + (compare:CCZ (match_operand:DI 1 "register_operand" "") + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "") + (ctz:DI (match_dup 1)))]) (set (match_dup 0) (if_then_else:DI (eq (reg:CCZ FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] - "") + "TARGET_64BIT" + "operands[2] = gen_reg_rtx (DImode);") (define_insn "*ffsdi_1" [(set (reg:CCZ FLAGS_REG) @@ -15019,16 +15278,25 @@ [(set_attr "prefix_0f" "1") (set_attr "length" "2")]) +(define_insn "*bswaphi_lowpart_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_XCHGB || optimize_size" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "2,4") + (set_attr "mode" "QI,HI")]) + (define_insn "bswaphi_lowpart" - [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q")) - (bswap:HI (match_dup 0)))] + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] "" - "xchg{b}\t%h0, %b0" - [(set_attr "type" "alu1") - (set_attr "mode" "QI") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + "rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "4") + (set_attr "mode" "HI")]) (define_insn "bswapdi2" [(set (match_operand:DI 0 "register_operand" "=r") @@ -15353,7 +15621,7 @@ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLS_GD)] "TARGET_64BIT" - ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2" + ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\;.word\t0x6666\;rex64\;call\t%P2" [(set_attr "type" "multi") (set_attr "length" "16")]) @@ -15431,7 +15699,7 @@ (match_operand:DI 2 "" ""))) (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" - "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -15481,7 +15749,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(const_int 0)] UNSPEC_TP))] "!TARGET_64BIT" - "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15494,7 +15762,7 @@ (match_operand:SI 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" [(set_attr "type" "alu") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15505,7 +15773,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_TP))] "TARGET_64BIT" - "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15518,7 +15786,7 @@ (match_operand:DI 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" [(set_attr "type" "alu") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15541,7 +15809,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT && TARGET_GNU2_TLS" { - operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); ix86_tls_descriptor_calls_expanded_in_cfun = true; }) @@ -15590,7 +15858,7 @@ "" [(set (match_dup 0) (match_dup 5))] { - operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); }) @@ -15605,7 +15873,7 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && TARGET_GNU2_TLS" { - operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); ix86_tls_descriptor_calls_expanded_in_cfun = true; }) @@ -15614,7 +15882,7 @@ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLSDESC))] "TARGET_64BIT && TARGET_GNU2_TLS" - "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}" [(set_attr "type" "lea") (set_attr "mode" "DI") (set_attr "length" "7") @@ -15649,7 +15917,7 @@ "" [(set (match_dup 0) (match_dup 4))] { - operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); }) @@ -15741,6 +16009,15 @@ (const_string "fop"))) (set_attr "mode" "SF")]) +(define_insn "*rcpsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "rcpss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + (define_insn "*fop_sf_1_sse" [(set (match_operand:SF 0 "register_operand" "=x") (match_operator:SF 3 "binary_fp_operator" @@ -16081,7 +16358,7 @@ [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF - (match_operand:X87MODEF12 1 "nonimmediate_operand" "fm,0")) + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" @@ -16099,7 +16376,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF - (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16115,9 +16392,9 @@ [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0,f")) + (match_operand:MODEF 1 "register_operand" "0,f")) (float_extend:XF - (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16134,8 +16411,8 @@ (match_operator 3 "binary_fp_operator" [(float (match_operand:X87MODEI12 1 "register_operand" "")) (match_operand 2 "register_operand" "")]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); @@ -16154,8 +16431,8 @@ (match_operator 3 "binary_fp_operator" [(match_operand 1 "register_operand" "") (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); @@ -16175,8 +16452,8 @@ ;; all fancy i386 XFmode math functions. (define_insn "truncxf2_i387_noop_unspec" - [(set (match_operand:X87MODEF12 0 "register_operand" "=f") - (unspec:X87MODEF12 [(match_operand:XF 1 "register_operand" "f")] + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] UNSPEC_TRUNC_NOOP))] "TARGET_USE_FANCY_MATH_387" "* return output_387_reg_move (insn, operands);" @@ -16197,18 +16474,37 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))))] + (match_operand:MODEF 1 "register_operand" "0"))))] "TARGET_USE_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct") + (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "direct")]) +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "rsqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + (define_insn "*sqrt2_sse" - [(set (match_operand:SSEMODEF 0 "register_operand" "=x") - (sqrt:SSEMODEF - (match_operand:SSEMODEF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "sqrts\t{%1, %0|%0, %1}" [(set_attr "type" "sse") @@ -16217,12 +16513,21 @@ (set_attr "amdfam10_decode" "*")]) (define_expand "sqrt2" - [(set (match_operand:X87MODEF12 0 "register_operand" "") - (sqrt:X87MODEF12 - (match_operand:X87MODEF12 1 "nonimmediate_operand" "")))] + [(set (match_operand:MODEF 0 "register_operand" "") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "")))] "TARGET_USE_FANCY_MATH_387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { + if (mode == SFmode + && TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { rtx op0 = gen_reg_rtx (XFmode); @@ -16243,7 +16548,8 @@ (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM_U)) (set (reg:CCFP FPSR_REG) - (unspec:CCFP [(const_int 0)] UNSPEC_NOP))] + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] "TARGET_USE_FANCY_MATH_387" "fprem" [(set_attr "type" "fpspc") @@ -16257,20 +16563,29 @@ { rtx label = gen_label_rtx (); - emit_label (label); + rtx op2; + + if (rtx_equal_p (operands[1], operands[2])) + { + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, operands[2]); + } + else + op2 = operands[2]; - emit_insn (gen_fpremxf4_i387 (operands[1], operands[2], - operands[1], operands[2])); + emit_label (label); + emit_insn (gen_fpremxf4_i387 (operands[1], op2, operands[1], op2)); ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; emit_move_insn (operands[0], operands[1]); DONE; }) (define_expand "fmod3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) - (use (match_operand:X87MODEF12 2 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); @@ -16284,6 +16599,7 @@ emit_label (label); emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH @@ -16304,7 +16620,8 @@ (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM1_U)) (set (reg:CCFP FPSR_REG) - (unspec:CCFP [(const_int 0)] UNSPEC_NOP))] + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] "TARGET_USE_FANCY_MATH_387" "fprem1" [(set_attr "type" "fpspc") @@ -16318,20 +16635,29 @@ { rtx label = gen_label_rtx (); - emit_label (label); + rtx op2; + + if (rtx_equal_p (operands[1], operands[2])) + { + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, operands[2]); + } + else + op2 = operands[2]; - emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2], - operands[1], operands[2])); + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (operands[1], op2, operands[1], op2)); ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; emit_move_insn (operands[0], operands[1]); DONE; }) (define_expand "remainder3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) - (use (match_operand:X87MODEF12 2 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); @@ -16346,6 +16672,7 @@ emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH @@ -16369,7 +16696,7 @@ (define_insn "*sin_extendxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))] + (match_operand:MODEF 1 "register_operand" "0"))] UNSPEC_SIN))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16391,7 +16718,7 @@ (define_insn "*cos_extendxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))] + (match_operand:MODEF 1 "register_operand" "0"))] UNSPEC_COS))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16426,7 +16753,7 @@ (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] "") @@ -16437,14 +16764,14 @@ (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] "") (define_insn "sincos_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] @@ -16459,31 +16786,31 @@ (define_split [(set (match_operand:XF 0 "register_operand" "") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" ""))] + (match_operand:MODEF 2 "register_operand" ""))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))] "") (define_split [(set (match_operand:XF 0 "register_operand" "") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" ""))] + (match_operand:MODEF 2 "register_operand" ""))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))] "") (define_expand "sincos3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" "")) - (use (match_operand:X87MODEF12 2 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16512,11 +16839,11 @@ (set_attr "mode" "XF")]) (define_insn "fptan_extendxf4_i387" - [(set (match_operand:X87MODEF12 0 "register_operand" "=f") - (match_operand:X87MODEF12 3 "const_double_operand" "F")) + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_TAN))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16541,8 +16868,8 @@ }) (define_expand "tan2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16574,9 +16901,9 @@ (define_insn "fpatan_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "u"))] + (match_operand:MODEF 2 "register_operand" "u"))] UNSPEC_FPATAN)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 @@ -16598,9 +16925,9 @@ "") (define_expand "atan23" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" "")) - (use (match_operand:X87MODEF12 2 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16627,8 +16954,8 @@ }) (define_expand "atan2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16666,8 +16993,8 @@ }) (define_expand "asin2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16704,8 +17031,8 @@ }) (define_expand "acos2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16735,7 +17062,7 @@ (define_insn "fyl2x_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (match_operand:XF 2 "register_operand" "u")] UNSPEC_FYL2X)) (clobber (match_scratch:XF 3 "=2"))] @@ -16760,8 +17087,8 @@ }) (define_expand "log2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16790,8 +17117,8 @@ }) (define_expand "log102" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16820,8 +17147,8 @@ }) (define_expand "log22" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16852,7 +17179,7 @@ (define_insn "fyl2xp1_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (match_operand:XF 2 "register_operand" "u")] UNSPEC_FYL2XP1)) (clobber (match_scratch:XF 3 "=2"))] @@ -16875,8 +17202,8 @@ }) (define_expand "log1p2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16906,7 +17233,7 @@ (define_insn "fxtract_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_XTRACT_FRACT)) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] @@ -16931,8 +17258,8 @@ }) (define_expand "logb2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16962,7 +17289,7 @@ (define_expand "ilogb2" [(use (match_operand:SI 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17038,8 +17365,8 @@ }) (define_expand "exp2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17068,8 +17395,8 @@ }) (define_expand "exp102" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17098,8 +17425,8 @@ }) (define_expand "exp22" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17119,11 +17446,12 @@ (match_dup 2))) (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 9) (float_extend:XF (match_dup 13))) (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) (parallel [(set (match_dup 7) (unspec:XF [(match_dup 6) (match_dup 4)] UNSPEC_FSCALE_FRACT)) - (set (match_dup 8) + (set (match_dup 8) (unspec:XF [(match_dup 6) (match_dup 4)] UNSPEC_FSCALE_EXP))]) (parallel [(set (match_dup 10) @@ -17132,7 +17460,8 @@ (set (match_dup 11) (unspec:XF [(match_dup 9) (match_dup 8)] UNSPEC_FSCALE_EXP))]) - (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9))) + (set (match_dup 12) (minus:XF (match_dup 10) + (float_extend:XF (match_dup 13)))) (set (match_operand:XF 0 "register_operand" "") (plus:XF (match_dup 12) (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 @@ -17142,14 +17471,16 @@ for (i = 2; i < 13; i++) operands[i] = gen_reg_rtx (XFmode); - + + operands[13] + = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ + emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ - emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */ }) (define_expand "expm12" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17182,8 +17513,8 @@ }) (define_expand "ldexp3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) (use (match_operand:SI 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -17198,101 +17529,105 @@ emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); DONE; }) + +(define_expand "scalbxf3" + [(parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 3) + (unspec:XF [(match_dup 1) (match_dup 2)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations && !optimize_size" +{ + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_expand "scalb3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_scalbxf3 (op0, op1, op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) -(define_insn "frndintxf2" +(define_insn "sse4_1_round2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "rounds\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "")]) + +(define_insn "rintxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] - UNSPEC_FRNDINT))] + UNSPEC_FRNDINT))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "frndint" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) -(define_expand "rintdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] +(define_expand "rint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "(TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size)" + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size) - ix86_expand_rint (operand0, operand1); - else + && (TARGET_ROUND || !optimize_size)) { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2 (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operand0, operand1); } - DONE; -}) - -(define_expand "rintsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "(TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math - && !optimize_size)" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math - && !optimize_size) - ix86_expand_rint (operand0, operand1); else { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2 (op0, op1)); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) -(define_expand "rintxf2" - [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" -{ - emit_insn (gen_frndintxf2 (operands[0], operands[1])); - DONE; -}) - -(define_expand "roundsf2" - [(match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "nonimmediate_operand" "")] - "SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math && !flag_rounding_math - && !optimize_size" -{ - ix86_expand_round (operand0, operand1); - DONE; -}) - -(define_expand "rounddf2" - [(match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "nonimmediate_operand" "")] - "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH +(define_expand "round2" + [(match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "nonimmediate_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math && !optimize_size" { - if (TARGET_64BIT) + if (TARGET_64BIT || (mode != DFmode)) ix86_expand_round (operand0, operand1); else ix86_expand_rounddf_32 (operand0, operand1); @@ -17300,9 +17635,9 @@ }) (define_insn_and_split "*fistdi2_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] - UNSPEC_FIST))] + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 && !(reload_completed || reload_in_progress)" "#" @@ -17325,7 +17660,7 @@ (define_insn "fistdi2" [(set (match_operand:DI 0 "memory_operand" "=m") (unspec:DI [(match_operand:XF 1 "register_operand" "f")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_scratch:XF 2 "=&1f"))] "TARGET_USE_FANCY_MATH_387" "* return output_fix_trunc (insn, operands, 0);" @@ -17335,7 +17670,7 @@ (define_insn "fistdi2_with_temp" [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:DI 2 "memory_operand" "=m,m")) (clobber (match_scratch:XF 3 "=&1f,&1f"))] "TARGET_USE_FANCY_MATH_387" @@ -17346,7 +17681,7 @@ (define_split [(set (match_operand:DI 0 "register_operand" "") (unspec:DI [(match_operand:XF 1 "register_operand" "")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:DI 2 "memory_operand" "")) (clobber (match_scratch 3 ""))] "reload_completed" @@ -17358,7 +17693,7 @@ (define_split [(set (match_operand:DI 0 "memory_operand" "") (unspec:DI [(match_operand:XF 1 "register_operand" "")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:DI 2 "memory_operand" "")) (clobber (match_scratch 3 ""))] "reload_completed" @@ -17367,9 +17702,9 @@ "") (define_insn_and_split "*fist2_1" - [(set (match_operand:X87MODEI12 0 "register_operand" "=r") - (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] - UNSPEC_FIST))] + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 && !(reload_completed || reload_in_progress)" "#" @@ -17387,7 +17722,7 @@ (define_insn "fist2" [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] - UNSPEC_FIST))] + UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387" "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fpspc") @@ -17396,7 +17731,7 @@ (define_insn "fist2_with_temp" [(set (match_operand:X87MODEI12 0 "register_operand" "=r") (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] "TARGET_USE_FANCY_MATH_387" "#" @@ -17406,60 +17741,42 @@ (define_split [(set (match_operand:X87MODEI12 0 "register_operand" "") (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] "reload_completed" - [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] - UNSPEC_FIST)) + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST)) (set (match_dup 0) (match_dup 2))] "") (define_split [(set (match_operand:X87MODEI12 0 "memory_operand" "") (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] - UNSPEC_FIST)) + UNSPEC_FIST)) (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] "reload_completed" - [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] - UNSPEC_FIST))] - "") - -(define_expand "lrintxf2" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") - (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] - UNSPEC_FIST))] - "TARGET_USE_FANCY_MATH_387" - "") - -(define_expand "lrintdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (unspec:DI [(match_operand:SSEMODEF 1 "register_operand" "")] - UNSPEC_FIX_NOTRUNC))] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT" - "") - -(define_expand "lrintsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (unspec:SI [(match_operand:SSEMODEF 1 "register_operand" "")] - UNSPEC_FIX_NOTRUNC))] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))] "") -(define_expand "lrounddi2" - [(match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT - && !flag_trapping_math && !flag_rounding_math - && !optimize_size" -{ - ix86_expand_lround (operand0, operand1); - DONE; -}) +(define_expand "lrintxf2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "") -(define_expand "lroundsi2" - [(match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH +(define_expand "lrint2" + [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] + UNSPEC_FIX_NOTRUNC))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT)" + "") + +(define_expand "lround2" + [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math && !optimize_size" { @@ -17469,8 +17786,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_floor" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_FLOOR)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17516,20 +17833,25 @@ DONE; }) -(define_expand "floordf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "floor2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, true); else ix86_expand_floorceildf_32 (operand0, operand1, true); @@ -17539,43 +17861,17 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "floorsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, true); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_floor (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) (define_insn_and_split "*fist2_floor_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST_FLOOR)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17733,7 +18029,7 @@ (define_expand "lfloordi2" [(match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT && !flag_trapping_math && !optimize_size" @@ -17744,7 +18040,7 @@ (define_expand "lfloorsi2" [(match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && (!optimize_size || !TARGET_64BIT)" @@ -17755,8 +18051,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_ceil" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_CEIL)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17802,20 +18098,25 @@ DONE; }) -(define_expand "ceildf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "ceil2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, false); else ix86_expand_floorceildf_32 (operand0, operand1, false); @@ -17825,43 +18126,17 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "ceilsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, false); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_ceil (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) (define_insn_and_split "*fist2_ceil_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST_CEIL)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -18019,7 +18294,7 @@ (define_expand "lceildi2" [(match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT && !flag_trapping_math" { @@ -18029,7 +18304,7 @@ (define_expand "lceilsi2" [(match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math" { @@ -18039,8 +18314,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_trunc" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_TRUNC)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -18086,20 +18361,25 @@ DONE; }) -(define_expand "btruncdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "btrunc2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_trunc (operand0, operand1); else ix86_expand_truncdf_32 (operand0, operand1); @@ -18109,44 +18389,18 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_trunc (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "btruncsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_trunc (operand0, operand1); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_trunc (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_mask_pm" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_MASK_PM)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -18193,37 +18447,21 @@ DONE; }) -(define_expand "nearbyintdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" -{ - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_mask_pm (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - DONE; -}) - -(define_expand "nearbyintsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] +(define_expand "nearbyint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_mask_pm (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); DONE; }) @@ -18242,8 +18480,8 @@ [(use (match_operand:SI 0 "register_operand" "")) (use (match_operand:X87MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387)" + && TARGET_C99_FUNCTIONS + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { rtx mask = GEN_INT (0x45); rtx val = GEN_INT (0x05); @@ -18264,6 +18502,20 @@ DONE; }) +(define_expand "signbit2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:X87MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x0200); + + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxam2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + DONE; +}) ;; Block operation instructions @@ -18318,7 +18570,9 @@ operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); - if (TARGET_SINGLE_STRINGOP || optimize_size) + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_size) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -18469,7 +18723,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsq|rep movsq}" + "rep movsq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18488,7 +18742,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18507,7 +18761,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18524,7 +18778,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18541,7 +18795,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18721,7 +18975,7 @@ (use (match_operand:DI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosq|rep stosq}" + "rep stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18738,7 +18992,7 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18755,7 +19009,7 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18771,7 +19025,7 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18787,7 +19041,7 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18804,7 +19058,7 @@ rtx addr1, addr2, out, outlow, count, countreg, align; /* Can't use this if the user has appropriated esi or edi. */ - if (global_regs[4] || global_regs[5]) + if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) FAIL; out = operands[0]; @@ -18896,7 +19150,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18911,7 +19165,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18946,7 +19200,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18964,7 +19218,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -19011,7 +19265,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -19025,7 +19279,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -19119,7 +19373,7 @@ (match_operand:DI 2 "general_operand" "") (match_operand:DI 3 "general_operand" "")))] "TARGET_64BIT" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "x86_movdicc_0_m1_rex64" [(set (match_operand:DI 0 "register_operand" "=r") @@ -19138,6 +19392,21 @@ (set_attr "mode" "DI") (set_attr "length_immediate" "0")]) +(define_insn "*x86_movdicc_0_m1_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{q}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + (define_insn "*movdicc_c_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" @@ -19158,7 +19427,7 @@ (match_operand:SI 2 "general_operand" "") (match_operand:SI 3 "general_operand" "")))] "" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. @@ -19181,6 +19450,21 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) +(define_insn "*x86_movsicc_0_m1_se" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + (define_insn "*movsicc_noc" [(set (match_operand:SI 0 "register_operand" "=r,r") (if_then_else:SI (match_operator 1 "ix86_comparison_operator" @@ -19201,7 +19485,7 @@ (match_operand:HI 2 "general_operand" "") (match_operand:HI 3 "general_operand" "")))] "TARGET_HIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "*movhicc_noc" [(set (match_operand:HI 0 "register_operand" "=r,r") @@ -19223,7 +19507,7 @@ (match_operand:QI 2 "general_operand" "") (match_operand:QI 3 "general_operand" "")))] "TARGET_QIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn_and_split "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") @@ -19245,13 +19529,15 @@ [(set_attr "type" "icmov") (set_attr "mode" "SI")]) -(define_expand "movsfcc" - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operand 1 "comparison_operator" "") - (match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") +(define_expand "movcc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") (define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") @@ -19269,14 +19555,6 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) -(define_expand "movdfcc" - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operand 1 "comparison_operator" "") - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" @@ -19329,14 +19607,6 @@ split_di (operands+3, 1, operands+7, operands+8); split_di (operands, 1, operands+2, operands+3);") -(define_expand "movxfcc" - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operand 1 "comparison_operator" "") - (match_operand:XF 2 "register_operand" "") - (match_operand:XF 3 "register_operand" "")))] - "TARGET_80387 && TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movxfcc_1" [(set (match_operand:XF 0 "register_operand" "=f,f") (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" @@ -19350,46 +19620,44 @@ [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) +;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; SSE5 conditional move +(define_insn "*sse5_pcmov_" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x,0") + (match_operand:MODEF 2 "register_operand" "0,x") + (match_operand:MODEF 3 "register_operand" "x,x")))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. -(define_insn "sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" +(define_insn "smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smax:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) @@ -19397,45 +19665,27 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "*ieee_smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) ;; Make two stack loads independent: ;; fld aa fld aa @@ -19470,7 +19720,7 @@ (match_operand:QI 2 "register_operand" "") (match_operand:QI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "addhicc" [(match_operand:HI 0 "register_operand" "") @@ -19478,7 +19728,7 @@ (match_operand:HI 2 "register_operand" "") (match_operand:HI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "addsicc" [(match_operand:SI 0 "register_operand" "") @@ -19486,7 +19736,7 @@ (match_operand:SI 2 "register_operand" "") (match_operand:SI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") (define_expand "adddicc" [(match_operand:DI 0 "register_operand" "") @@ -19494,7 +19744,7 @@ (match_operand:DI 2 "register_operand" "") (match_operand:DI 3 "const_int_operand" "")] "TARGET_64BIT" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") ;; Misc patterns (?) @@ -19617,86 +19867,56 @@ [(set_attr "type" "alu,lea") (set_attr "mode" "DI")]) -(define_expand "allocate_stack_worker" - [(match_operand:SI 0 "register_operand" "")] - "TARGET_STACK_PROBE" -{ - if (reload_completed) - { - if (TARGET_64BIT) - emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0])); - else - emit_insn (gen_allocate_stack_worker_postreload (operands[0])); - } - else - { - if (TARGET_64BIT) - emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); - else - emit_insn (gen_allocate_stack_worker_1 (operands[0])); - } - DONE; -}) - -(define_insn "allocate_stack_worker_1" - [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] - UNSPECV_STACK_PROBE) +(define_insn "allocate_stack_worker_32" + [(set (match_operand:SI 0 "register_operand" "+a") + (unspec_volatile:SI [(match_dup 0)] UNSPECV_STACK_PROBE)) (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) - (clobber (match_scratch:SI 1 "=0")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) -(define_expand "allocate_stack_worker_postreload" - [(parallel [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] - UNSPECV_STACK_PROBE) - (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) - (clobber (match_dup 0)) - (clobber (reg:CC FLAGS_REG))])] - "" - "") - -(define_insn "allocate_stack_worker_rex64" - [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] - UNSPECV_STACK_PROBE) +(define_insn "allocate_stack_worker_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(match_dup 0)] UNSPECV_STACK_PROBE)) (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0))) - (clobber (match_scratch:DI 1 "=0")) + (clobber (reg:DI R10_REG)) + (clobber (reg:DI R11_REG)) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) -(define_expand "allocate_stack_worker_rex64_postreload" - [(parallel [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] - UNSPECV_STACK_PROBE) - (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0))) - (clobber (match_dup 0)) - (clobber (reg:CC FLAGS_REG))])] - "" - "") - (define_expand "allocate_stack" - [(parallel [(set (match_operand:SI 0 "register_operand" "=r") - (minus:SI (reg:SI SP_REG) - (match_operand:SI 1 "general_operand" ""))) - (clobber (reg:CC FLAGS_REG))]) - (parallel [(set (reg:SI SP_REG) - (minus:SI (reg:SI SP_REG) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))])] + [(match_operand 0 "register_operand" "") + (match_operand 1 "general_operand" "")] "TARGET_STACK_PROBE" { -#ifdef CHECK_STACK_LIMIT - if (CONST_INT_P (operands[1]) + rtx x; + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT 0 +#endif + + if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) && INTVAL (operands[1]) < CHECK_STACK_LIMIT) - emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, - operands[1])); + { + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1], + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + } else -#endif - emit_insn (gen_allocate_stack_worker (copy_to_mode_reg (SImode, - operands[1]))); + { + x = copy_to_mode_reg (Pmode, operands[1]); + if (TARGET_64BIT) + x = gen_allocate_stack_worker_64 (x); + else + x = gen_allocate_stack_worker_32 (x); + emit_insn (x); + } emit_move_insn (operands[0], virtual_stack_dynamic_rtx); DONE; @@ -19762,11 +19982,11 @@ (set (match_operand 1 "register_operand" "") (and (match_dup 3) (match_dup 4)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode) && ! optimize_size && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) - || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))" + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" [(parallel [(set (match_dup 0) (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) (const_int 0)])) @@ -19791,10 +20011,10 @@ (match_operand:HI 3 "const_int_operand" "")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed - /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode) && ! TARGET_FAST_PREFIX - && ! optimize_size" + && ! optimize_size + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" [(set (match_dup 0) (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)]))] @@ -19953,8 +20173,8 @@ (set (match_operand:SI 0 "memory_operand" "") (match_operand:SI 1 "immediate_operand" ""))] "! optimize_size - && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19963,8 +20183,9 @@ [(match_scratch:HI 2 "r") (set (match_operand:HI 0 "memory_operand" "") (match_operand:HI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "! optimize_size + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19973,8 +20194,9 @@ [(match_scratch:QI 2 "q") (set (match_operand:QI 0 "memory_operand" "") (match_operand:QI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "! optimize_size + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19986,7 +20208,7 @@ [(match_operand:SI 2 "memory_operand" "") (const_int 0)])) (match_scratch:SI 3 "r")] - "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" + " ! optimize_size && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] "") @@ -20006,11 +20228,11 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "") (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM + && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], SImode))) - || (TARGET_K6 && long_memory_operand (operands[0], SImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:SI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -20020,11 +20242,11 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "") (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM + && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], HImode))) - || (TARGET_K6 && long_memory_operand (operands[0], HImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:HI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -20034,11 +20256,11 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "") (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) - && ((TARGET_PENTIUM + && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], QImode))) - || (TARGET_K6 && long_memory_operand (operands[0], QImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:QI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -20058,7 +20280,7 @@ (match_operand:SI 3 "immediate_operand" "")) (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[2]) != 0 + && (true_regnum (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel @@ -20080,7 +20302,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != 0 + && true_regnum (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) @@ -20102,7 +20324,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != 0 + && true_regnum (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 @@ -20220,7 +20442,7 @@ "(GET_MODE (operands[0]) == HImode || GET_MODE (operands[0]) == SImode || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (optimize_size || TARGET_PENTIUM) + && (optimize_size || TARGET_MOVE_M1_VIA_OR) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] @@ -20624,7 +20846,7 @@ (mult:DI (match_operand:DI 1 "memory_operand" "") (match_operand:DI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) @@ -20637,7 +20859,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) @@ -20651,7 +20873,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) @@ -20668,7 +20890,7 @@ (match_operand:DI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:DI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size + "TARGET_SLOW_IMUL_IMM8 && !optimize_size && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) @@ -20684,7 +20906,7 @@ (match_operand:SI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:SI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size + "TARGET_SLOW_IMUL_IMM8 && !optimize_size && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) @@ -20700,7 +20922,7 @@ (match_operand:HI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:HI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size" + "TARGET_SLOW_IMUL_IMM8 && !optimize_size" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] @@ -20861,7 +21083,8 @@ [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) (match_operand:DI 2 "" "")))] - "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" { if (constant_call_address_operand (operands[1], Pmode)) return "call\t%P1"; @@ -20869,6 +21092,14 @@ } [(set_attr "type" "callv")]) +(define_insn "*call_value_1_rex64_large" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" "")))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "call\t%A1" + [(set_attr "type" "callv")]) + (define_insn "*sibcall_value_1_rex64" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) @@ -20882,7 +21113,7 @@ (call (mem:QI (reg:DI R11_REG)) (match_operand:DI 1 "" "")))] "SIBLING_CALL_P (insn) && TARGET_64BIT" - "jmp\t*%%r11" + "jmp\t{*%%}r11" [(set_attr "type" "callv")]) ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. @@ -21090,7 +21321,7 @@ (set (match_scratch:SI 2 "=&r") (const_int 0)) (clobber (reg:CC FLAGS_REG))] "" - "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" [(set_attr "type" "multi")]) (define_insn "stack_tls_protect_set_di" @@ -21104,9 +21335,9 @@ system call would not have to trash the userspace segment register, which would be expensive */ if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; else - return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; } [(set_attr "type" "multi")]) @@ -21165,7 +21396,7 @@ UNSPEC_SP_TLS_TEST)) (clobber (match_scratch:SI 3 "=r"))] "" - "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}" [(set_attr "type" "multi")]) (define_insn "stack_tls_protect_test_di" @@ -21180,12 +21411,42 @@ system call would not have to trash the userspace segment register, which would be expensive */ if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"; + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}"; else - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}"; + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}"; } [(set_attr "type" "multi")]) +(define_mode_iterator CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32q\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + (include "mmx.md") (include "sse.md") (include "sync.md")