X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.md;h=c245a49fd3f4ee40bb09818e58f0ab310202ef59;hp=849ca830ebafd0b3a0bda2b084714e851a0f9384;hb=6c24dca6125225d09ddbb06bbf31a49e4ff5c88c;hpb=79959616093d981c7c71a6ab16493a0b3c707257 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 849ca830eba..c245a49fd3f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,6 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003, 2004, 2005, 2006, 2007 +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -9,7 +9,7 @@ ;; ;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, @@ -18,9 +18,8 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License -;; along with GCC; see the file COPYING. If not, write to -;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. */ +;; along with GCC; see the file COPYING3. If not see +;; . */ ;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. @@ -96,7 +95,6 @@ (UNSPEC_RCP 45) (UNSPEC_RSQRT 46) (UNSPEC_SFENCE 47) - (UNSPEC_NOP 48) ; prevents combiner cleverness (UNSPEC_PFRCP 49) (UNSPEC_PFRCPIT1 40) (UNSPEC_PFRCPIT2 41) @@ -159,9 +157,44 @@ ; For SSE4A support (UNSPEC_EXTRQI 130) - (UNSPEC_EXTRQ 131) + (UNSPEC_EXTRQ 131) (UNSPEC_INSERTQI 132) (UNSPEC_INSERTQ 133) + + ; For SSE4.1 support + (UNSPEC_BLENDV 134) + (UNSPEC_INSERTPS 135) + (UNSPEC_DP 136) + (UNSPEC_MOVNTDQA 137) + (UNSPEC_MPSADBW 138) + (UNSPEC_PHMINPOSUW 139) + (UNSPEC_PTEST 140) + (UNSPEC_ROUND 141) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) + (UNSPEC_PCMPESTR 144) + (UNSPEC_PCMPISTR 145) + + ;; For SSE5 + (UNSPEC_SSE5_INTRINSIC 150) + (UNSPEC_SSE5_UNSIGNED_CMP 151) + (UNSPEC_SSE5_TRUEFALSE 152) + (UNSPEC_SSE5_PERMUTE 153) + (UNSPEC_FRCZ 154) + (UNSPEC_CVTPH2PS 155) + (UNSPEC_CVTPS2PH 156) + + ; For AES support + (UNSPEC_AESENC 159) + (UNSPEC_AESENCLAST 160) + (UNSPEC_AESDEC 161) + (UNSPEC_AESDECLAST 162) + (UNSPEC_AESIMC 163) + (UNSPEC_AESKEYGENASSIST 164) + + ; For PCLMUL support + (UNSPEC_PCLMUL 165) ]) (define_constants @@ -179,11 +212,42 @@ (UNSPECV_CMPXCHG_2 11) (UNSPECV_XCHG 12) (UNSPECV_LOCK 13) + (UNSPECV_PROLOGUE_USE 14) + (UNSPECV_CLD 15) + ]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) ]) +;; Constants used in the SSE5 pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + ;; Registers by name. (define_constants - [(BP_REG 6) + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) (SP_REG 7) (FLAGS_REG 17) (FPSR_REG 18) @@ -217,8 +281,9 @@ push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, - sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, + ssemuladd,sse4arg, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -231,8 +296,9 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") - (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins") + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") @@ -317,6 +383,9 @@ ] (const_int 0))) +;; There are also additional prefixes in SSSE3. +(define_attr "prefix_extra" "" (const_int 0)) + ;; Set when modrm byte is used. (define_attr "modrm" "" (cond [(eq_attr "type" "str,leave") @@ -366,7 +435,8 @@ (plus (plus (attr "modrm") (plus (attr "prefix_0f") (plus (attr "prefix_rex") - (const_int 1)))) + (plus (attr "prefix_extra") + (const_int 1))))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -428,11 +498,11 @@ "!alu1,negnot,ishift1, imov,imovx,icmp,test,bitmanip, fmov,fcmp,fsgn, - sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, - mmx,mmxmov,mmxcmp,mmxcvt") + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, + sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") - (and (eq_attr "type" "icmov") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand" "")) (const_string "load") ] @@ -469,23 +539,87 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) -;; All x87 floating point modes -(define_mode_macro X87MODEF [SF DF XF]) +;; All integer comparison codes. +(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ]) + +;; All floating-point comparison codes. +(define_code_iterator fp_cond [unordered ordered + uneq unge ungt unle unlt ltgt ]) + +(define_code_iterator plusminus [plus minus]) + +(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) + +;; Base name for define_insn +(define_code_attr plusminus_insn + [(plus "add") (ss_plus "ssadd") (us_plus "usadd") + (minus "sub") (ss_minus "sssub") (us_minus "ussub")]) + +;; Base name for insn mnemonic. +(define_code_attr plusminus_mnemonic + [(plus "add") (ss_plus "adds") (us_plus "addus") + (minus "sub") (ss_minus "subs") (us_minus "subus")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") + (minus "") (ss_minus "") (us_minus "")]) + +;; Mapping of signed max and min +(define_code_iterator smaxmin [smax smin]) + +;; Mapping of unsigned max and min +(define_code_iterator umaxmin [umax umin]) -;; x87 SFmode and DFMode floating point modes -(define_mode_macro X87MODEF12 [SF DF]) +;; Base name for integer and FP insn mnemonic +(define_code_attr maxminiprefix [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) +(define_code_attr maxminfprefix [(smax "max") (smin "min")]) + +;; Mapping of parallel logic operators +(define_code_iterator plogic [and ior xor]) + +;; Base name for insn mnemonic. +(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")]) + +;; Mapping of abs neg operators +(define_code_iterator absneg [abs neg]) + +;; Base name for x87 insn mnemonic. +(define_code_attr absnegprefix [(abs "abs") (neg "chs")]) + +;; All single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "i") (HI "i") (SI "i") (DI "e")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_general_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) + +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) ;; All integer modes handled by x87 fisttp operator. -(define_mode_macro X87MODEI [HI SI DI]) +(define_mode_iterator X87MODEI [HI SI DI]) ;; All integer modes handled by integer x87 operators. -(define_mode_macro X87MODEI12 [HI SI]) - -;; All SSE floating point modes -(define_mode_macro SSEMODEF [SF DF]) +(define_mode_iterator X87MODEI12 [HI SI]) ;; All integer modes handled by SSE cvtts?2si* operators. -(define_mode_macro SSEMODEI24 [SI DI]) +(define_mode_iterator SSEMODEI24 [SI DI]) ;; SSE asm suffix for floating point modes (define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) @@ -493,6 +627,9 @@ ;; SSE vector mode corresponding to a scalar mode (define_mode_attr ssevecmode [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) ;; Scheduling descriptions @@ -643,8 +780,8 @@ (define_expand "cmpsi_1" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r") - (match_operand:SI 1 "general_operand" "ri,mr")))] + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" "")))] "" "") @@ -841,8 +978,8 @@ (define_expand "cmp" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:SSEMODEF 0 "cmp_fp_expander_operand" "") - (match_operand:SSEMODEF 1 "cmp_fp_expander_operand" "")))] + (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "") + (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))] "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { ix86_compare_op0 = operands[0]; @@ -866,8 +1003,7 @@ (match_operand 1 "register_operand" "f") (match_operand 2 "const0_operand" "X"))] UNSPEC_FNSTSW))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") @@ -880,6 +1016,34 @@ ] (const_string "XF")))]) +(define_insn_and_split "*cmpfp_0_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + (define_insn "*cmpfp_xf" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI @@ -893,12 +1057,33 @@ (set_attr "unit" "i387") (set_attr "mode" "XF")]) +(define_insn_and_split "*cmpfp_xf_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + (define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP - (match_operand:X87MODEF12 1 "register_operand" "f") - (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm"))] + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" @@ -906,6 +1091,27 @@ (set_attr "unit" "i387") (set_attr "mode" "")]) +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + (define_insn "*cmpfp_u" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI @@ -913,8 +1119,7 @@ (match_operand 1 "register_operand" "f") (match_operand 2 "register_operand" "f"))] UNSPEC_FNSTSW))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "multi") @@ -927,6 +1132,34 @@ ] (const_string "XF")))]) +(define_insn_and_split "*cmpfp_u_cc" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + (define_insn "*cmpfp_" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI @@ -935,8 +1168,8 @@ (match_operator 3 "float_operator" [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] UNSPEC_FNSTSW))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_USE_MODE_FIOP && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "multi") @@ -944,6 +1177,33 @@ (set_attr "fp_int_src" "true") (set_attr "mode" "")]) +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && TARGET_USE_MODE_FIOP + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + ;; FP compares, step 2 ;; Move the fpsw to ax. @@ -977,7 +1237,7 @@ (set_attr "mode" "SI")]) ;; Pentium Pro can do steps 1 through 3 in one go. -;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) (define_insn "*cmpfp_i_mixed" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "f,x") @@ -1014,9 +1274,9 @@ [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "f") (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) - && FLOAT_MODE_P (GET_MODE (operands[0])) + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") @@ -1066,9 +1326,9 @@ [(set (reg:CCFPU FLAGS_REG) (compare:CCFPU (match_operand 0 "register_operand" "f") (match_operand 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))) - && FLOAT_MODE_P (GET_MODE (operands[0])) + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "fcmp") @@ -1287,7 +1547,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) (define_expand "movhi" [(set (match_operand:HI 0 "nonimmediate_operand" "") @@ -1405,7 +1665,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) ;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swaphi_2" @@ -1551,25 +1811,6 @@ ] (const_string "QI")))]) -(define_expand "reload_outqi" - [(parallel [(match_operand:QI 0 "" "=m") - (match_operand:QI 1 "register_operand" "r") - (match_operand:QI 2 "register_operand" "=&q")])] - "" -{ - rtx op0, op1, op2; - op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; - - gcc_assert (!reg_overlap_mentioned_p (op2, op0)); - if (! q_regs_operand (op1, QImode)) - { - emit_insn (gen_movqi (op2, op1)); - op1 = op2; - } - emit_insn (gen_movqi (op0, op1)); - DONE; -}) - (define_insn "*swapqi_1" [(set (match_operand:QI 0 "register_operand" "+r") (match_operand:QI 1 "register_operand" "+r")) @@ -1581,7 +1822,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) ;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swapqi_2" @@ -1890,7 +2131,7 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] - "split_di (operands + 1, 1, operands + 2, operands + 3); + "split_di (&operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, GEN_INT (4))); @@ -1900,12 +2141,12 @@ [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "immediate_operand" ""))] "TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] - "split_di (operands + 1, 1, operands + 2, operands + 3); + "split_di (&operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, GEN_INT (4))); @@ -2130,18 +2371,18 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (operands, 2, operands + 2, operands + 4);") + "split_di (&operands[0], 2, &operands[2], &operands[4]);") (define_split [(set (match_operand:DI 0 "memory_operand" "") (match_operand:DI 1 "immediate_operand" ""))] "TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed) + ? epilogue_completed : reload_completed) && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (operands, 2, operands + 2, operands + 4);") + "split_di (&operands[0], 2, &operands[2], &operands[4]);") (define_insn "*swapdi_rex64" [(set (match_operand:DI 0 "register_operand" "+r") @@ -2154,7 +2395,7 @@ (set_attr "mode" "DI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "double")]) + (set_attr "amdfam10_decode" "double")]) (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -2163,6 +2404,8 @@ { if (TARGET_64BIT) ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); else ix86_expand_vector_move (TImode, operands); DONE; @@ -2183,10 +2426,23 @@ return "pxor\t%0, %0"; case 1: case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "movups\t{%1, %0|%0, %1}"; + else + return "movdqu\t{%1, %0|%0, %1}"; + } else - return "movdqa\t{%1, %0|%0, %1}"; + { + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + } default: gcc_unreachable (); } @@ -2220,10 +2476,23 @@ return "pxor\t%0, %0"; case 3: case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "movups\t{%1, %0|%0, %1}"; + else + return "movdqu\t{%1, %0|%0, %1}"; + } else - return "movdqa\t{%1, %0|%0, %1}"; + { + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + } default: gcc_unreachable (); } @@ -2290,7 +2559,7 @@ (set_attr "mode" "SF,SI,SF")]) (define_insn "*pushsf_rex64" - [(set (match_operand:SF 0 "push_operand" "=<,<,<") + [(set (match_operand:SF 0 "push_operand" "=X,X,X") (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] "TARGET_64BIT" { @@ -2343,13 +2612,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2499,26 +2763,25 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") (match_operand:DF 1 "general_operand" - "fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))] + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size + && !memory_operand (operands[0], DFmode) && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || ((optimize_size + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" { switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2633,13 +2896,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2760,13 +3018,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -2972,15 +3225,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -3007,15 +3253,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return standard_80387_constant_opcode (operands[1]); @@ -3030,6 +3269,59 @@ [(set_attr "type" "fmov,fmov,fmov,multi,multi") (set_attr "mode" "XF,XF,XF,SI,SI")]) +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + (define_split [(set (match_operand 0 "nonimmediate_operand" "") (match_operand 1 "general_operand" ""))] @@ -3050,7 +3342,8 @@ (match_operand 1 "memory_operand" ""))] "reload_completed && MEM_P (operands[1]) - && (GET_MODE (operands[0]) == XFmode + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) && (operands[2] = find_constant_src (insn))" @@ -3081,7 +3374,8 @@ (float_extend (match_operand 1 "memory_operand" "")))] "reload_completed && MEM_P (operands[1]) - && (GET_MODE (operands[0]) == XFmode + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) && (operands[2] = find_constant_src (insn))" @@ -3142,64 +3436,11 @@ operands[1] = CONST1_RTX (mode); }) -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_64BIT" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) - -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "*,*,sselog1,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - (define_split [(set (match_operand:TF 0 "nonimmediate_operand" "") (match_operand:TF 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") @@ -3275,7 +3516,7 @@ [(set (match_operand:HI 0 "register_operand" "=r") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" - "movz{bl|x}\t{%1, %k0|%k0, %k1}" + "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) @@ -3388,8 +3629,8 @@ ;; %%% Kill me once multi-word ops are sane. (define_expand "zero_extendsidi2" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { if (!TARGET_64BIT) @@ -3554,9 +3795,9 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[1]) == 0 - && true_regnum (operands[2]) == 1 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_size || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) { emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); } @@ -3585,8 +3826,8 @@ emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ - if (true_regnum (operands[3]) == 0 - && (optimize_size || TARGET_USE_CLTD)) + if ((optimize_size || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG) { emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); DONE; @@ -3703,7 +3944,7 @@ ;; %%% Kill these when call knows how to work out a DFmode push earlier. (define_insn "*dummy_extendsfdf2" [(set (match_operand:DF 0 "push_operand" "=<") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY")))] + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))] "0" "#") @@ -3780,7 +4021,50 @@ } }) -(define_insn "*extendsfdf2_mixed" +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "")))] + "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) + { + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } + else + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) + +(define_insn "*extendsfdf2_mixed" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] @@ -3789,13 +4073,8 @@ switch (which_alternative) { case 0: - return output_387_reg_move (insn, operands); - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); case 2: return "cvtss2sd\t{%1, %0|%0, %1}"; @@ -3819,73 +4098,13 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) - -(define_expand "extendsfxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "general_operand" "")))] - "TARGET_80387" -{ - /* ??? Needed for compress_float_constant since all fp constants - are LEGITIMATE_CONSTANT_P. */ - if (GET_CODE (operands[1]) == CONST_DOUBLE) - { - if (standard_80387_constant_p (operands[1]) > 0) - { - operands[1] = simplify_const_unary_operation - (FLOAT_EXTEND, XFmode, operands[1], SFmode); - emit_move_insn_1 (operands[0], operands[1]); - DONE; - } - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); - } -}) - -(define_insn "*extendsfxf2_i387" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - - default: - gcc_unreachable (); - } -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "SF,XF")]) -(define_expand "extenddfxf2" +(define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "general_operand" "")))] + (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))] "TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants @@ -3895,38 +4114,22 @@ if (standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation - (FLOAT_EXTEND, XFmode, operands[1], DFmode); + (FLOAT_EXTEND, XFmode, operands[1], mode); emit_move_insn_1 (operands[0], operands[1]); DONE; } - operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); + operands[1] = validize_mem (force_const_mem (mode, operands[1])); } }) -(define_insn "*extenddfxf2_i387" +(define_insn "*extendxf2_i387" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - return output_387_reg_move (insn, operands); - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - default: - gcc_unreachable (); - } -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF,XF")]) + (set_attr "mode" ",XF")]) ;; %%% This seems bad bad news. ;; This cannot output into an f-reg because there is no way to be sure @@ -3948,12 +4151,58 @@ ; else { - rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP); + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (SFmode, slot); emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); DONE; } }) +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] +{ + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); +}) + (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0 "" "") (float_truncate:SF (match_operand:DF 1 "" ""))) @@ -3961,27 +4210,22 @@ "") (define_insn "*truncdfsf_fast_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,f,x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f ,f,xm")))] + (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" { switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - case 1: return output_387_reg_move (insn, operands); - case 2: + case 1: return "cvtsd2ss\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,ssecvt") + [(set_attr "type" "fmov,ssecvt") (set_attr "mode" "SF")]) ;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, @@ -4014,10 +4258,8 @@ switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); + case 1: return "#"; case 2: @@ -4040,10 +4282,8 @@ switch (which_alternative) { case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); + case 1: return "#"; default: @@ -4061,12 +4301,7 @@ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) && !TARGET_MIX_SSE_I387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "SF")]) @@ -4082,162 +4317,93 @@ operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); }) -;; Conversion from XFmode to SFmode. +;; Conversion from XFmode to {SF,DF}mode -(define_expand "truncxfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) +(define_expand "truncxf2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] "TARGET_80387" { if (flag_unsafe_math_optimizations) { - rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode); - emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1])); + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (mode); + emit_insn (gen_truncxf2_i387_noop (reg, operands[1])); if (reg != operands[0]) emit_move_insn (operands[0], reg); DONE; } else - operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP); + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (mode, slot); + } }) (define_insn "*truncxfsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r") (float_truncate:SF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] + (match_operand:XF 1 "register_operand" "f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m"))] "TARGET_80387" { gcc_assert (!which_alternative); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); } - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") - (set_attr "mode" "SF")]) - -(define_insn "truncxfsf2_i387_noop" - [(set (match_operand:SF 0 "register_operand" "=f") - (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387 && flag_unsafe_math_optimizations" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) - -(define_insn "*truncxfsf2_i387" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") + [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") (set_attr "mode" "SF")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - -(define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -;; Conversion from XFmode to DFmode. - -(define_expand "truncxfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" -{ - if (flag_unsafe_math_optimizations) - { - rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode); - emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1])); - if (reg != operands[0]) - emit_move_insn (operands[0], reg); - DONE; - } - else - operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP); -}) - (define_insn "*truncxfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Y2*x") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?fY2*r") (float_truncate:DF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] + (match_operand:XF 1 "register_operand" "f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m"))] "TARGET_80387" { gcc_assert (!which_alternative); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; + return output_387_reg_move (insn, operands); } - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "unit" "*,i387,i387,i387") + [(set_attr "type" "fmov,multi") + (set_attr "unit" "*,i387") (set_attr "mode" "DF")]) -(define_insn "truncxfdf2_i387_noop" - [(set (match_operand:DF 0 "register_operand" "=f") - (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] +(define_insn "truncxf2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387 && flag_unsafe_math_optimizations" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "*truncxfdf2_i387" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF +(define_insn "*truncxf2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} + "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "register_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") (define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] + [(set (match_operand:MODEF 0 "memory_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] "TARGET_80387" - [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))] "") ;; Signed conversion to DImode. @@ -4257,7 +4423,7 @@ (define_expand "fix_truncdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:SSEMODEF 1 "register_operand" ""))) + (fix:DI (match_operand:MODEF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { @@ -4294,7 +4460,7 @@ (define_expand "fix_truncsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:SSEMODEF 1 "register_operand" ""))) + (fix:SI (match_operand:MODEF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" { @@ -4336,7 +4502,7 @@ [(parallel [(set (match_operand:SI 0 "register_operand" "") (unsigned_fix:SI - (match_operand:SSEMODEF 1 "nonimmediate_operand" ""))) + (match_operand:MODEF 1 "nonimmediate_operand" ""))) (use (match_dup 2)) (clobber (match_scratch: 3 "")) (clobber (match_scratch: 4 ""))])] @@ -4356,7 +4522,7 @@ (define_insn_and_split "*fixuns_trunc_1" [(set (match_operand:SI 0 "register_operand" "=&x,&x") (unsigned_fix:SI - (match_operand:SSEMODEF 3 "nonimmediate_operand" "xm,xm"))) + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) (use (match_operand: 4 "nonimmediate_operand" "m,x")) (clobber (match_scratch: 1 "=x,&x")) (clobber (match_scratch: 2 "=x,x"))] @@ -4375,7 +4541,7 @@ (define_expand "fixuns_trunchi2" [(set (match_dup 2) - (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" ""))) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" ""))) (set (match_operand:HI 0 "nonimmediate_operand" "") (subreg:HI (match_dup 2) 0))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" @@ -4384,7 +4550,7 @@ ;; When SSE is available, it is always faster to use it! (define_insn "fix_truncdi_sse" [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))] + (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] "TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH)" "cvtts2si{q}\t{%1, %0|%0, %1}" @@ -4395,7 +4561,7 @@ (define_insn "fix_truncsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))] + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] "SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH)" "cvtts2si\t{%1, %0|%0, %1}" @@ -4406,8 +4572,8 @@ ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. (define_peephole2 - [(set (match_operand:SSEMODEF 0 "register_operand" "") - (match_operand:SSEMODEF 1 "memory_operand" "")) + [(set (match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "memory_operand" "")) (set (match_operand:SSEMODEI24 2 "register_operand" "") (fix:SSEMODEI24 (match_dup 0)))] "TARGET_SHORTEN_X87_SSE @@ -4435,10 +4601,10 @@ "") (define_insn_and_split "fix_trunc_fisttp_i387_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" "")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH) @@ -4465,8 +4631,8 @@ [(set (match_operand:X87MODEI 0 "memory_operand" "=m") (fix:X87MODEI (match_operand 1 "register_operand" "f"))) (clobber (match_scratch:XF 2 "=&1f"))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH)" @@ -4477,10 +4643,10 @@ (define_insn "fix_trunc_i387_fisttp_with_temp" [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) - (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m")) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m")) (clobber (match_scratch:XF 3 "=&1f,&1f"))] - "TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH)" @@ -4515,11 +4681,11 @@ ;; clobbering insns can be used. Look at emit_i387_cw_initialization () ;; function in i386.c. (define_insn_and_split "*fix_trunc_i387_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && !(reload_completed || reload_in_progress)" @@ -4553,8 +4719,8 @@ (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) (clobber (match_scratch:XF 4 "=&1f"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fistp") @@ -4566,10 +4732,10 @@ (fix:DI (match_operand 1 "register_operand" "f,f"))) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) (clobber (match_scratch:XF 5 "=&1f,&1f"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "#" [(set_attr "type" "fistp") @@ -4610,8 +4776,8 @@ (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fistp") @@ -4623,9 +4789,9 @@ (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] - "TARGET_80387 && !TARGET_FISTTP - && FLOAT_MODE_P (GET_MODE (operands[1])) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "#" [(set_attr "type" "fistp") @@ -4675,318 +4841,634 @@ (set_attr "mode" "HI") (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) ;; Conversion between fixed point and floating point. ;; Even though we only accept memory inputs, the backend _really_ ;; wants to be able to do this between registers. -(define_expand "floathisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "floathi2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*floathi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);") + +(define_insn "*floathi2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:HI 2 "memory_operand" "=m,m"))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "#" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floathi2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "memory_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +(define_expand "float2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))] + "TARGET_80387 + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*float2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))] + "((TARGET_80387 + && (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)) + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode == SImode + && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && flag_trapping_math) + || !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)))) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] { - if (TARGET_SSE_MATH) + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + + /* Avoid store forwarding (partial memory) stall penalty + by passing DImode value through XMM registers. */ + if (mode == DImode && !TARGET_64BIT + && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !optimize_size) { - emit_insn (gen_floatsisf2 (operands[0], - convert_to_mode (SImode, operands[1], 0))); + emit_insn (gen_floatdi2_i387_with_xmm (operands[0], + operands[1], + operands[2])); DONE; } }) -(define_insn "*floathisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" +(define_insn "*floatsi2_vector_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") + (set_attr "mode" ",,,,") + (set_attr "unit" "*,i387,*,*,*") + (set_attr "athlon_decode" "*,*,double,direct,double") + (set_attr "amdfam10_decode" "*,*,vector,double,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" "@ fild%z1\t%1 #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387") + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" ",") + (set_attr "unit" "i387,*") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_expand "floatsisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "") - -(define_insn "*floatsisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_MIX_SSE_I387" - "@ - fild%z1\t%1 - # - cvtsi2ss\t{%1, %0|%0, %1} - cvtsi2ss\t{%1, %0|%0, %1}" +(define_insn "*float2_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387" + "#" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "SF") + (set_attr "mode" "") (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,vector,double") + (set_attr "athlon_decode" "*,*,double,direct") (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_SSE_MATH" - "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "fp_int_src" "true")]) +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && TARGET_INTER_UNIT_CONVERSIONS + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") -(define_insn "*floatsisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_insn "*float2_mixed_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387") + cvtsi2s\t{%1, %0|%0, %1} + cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "mode" "") + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") (set_attr "fp_int_src" "true")]) -(define_expand "floatdisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" - "") - -(define_insn "*floatdisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_64BIT && TARGET_MIX_SSE_I387" +(define_insn "*float2_mixed_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 - # - cvtsi2ss{q}\t{%1, %0|%0, %1} - cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,vector,double") - (set_attr "amdfam10_decode" "*,*,vector,double") + cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_64BIT && TARGET_SSE_MATH" - "cvtsi2ss{q}\t{%1, %0|%0, %1}" +(define_insn "*floatsi2_vector_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "#" [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") + (set_attr "mode" ",,") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387") +(define_insn "*floatsi2_vector_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_expand "floathidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] { - if (TARGET_SSE2 && TARGET_SSE_MATH) + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) { - emit_insn (gen_floatsidf2 (operands[0], - convert_to_mode (SImode, operands[1], 0))); - DONE; + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[2])); + } + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; }) -(define_insn "*floathidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387") - (set_attr "fp_int_src" "true")]) - -(define_expand "floatsidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_insn "*floatsidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_SSE2 && TARGET_MIX_SSE_I387" - "@ - fild%z1\t%1 - # - cvtsi2sd\t{%1, %0|%0, %1} - cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,double,direct") - (set_attr "amdfam10_decode" "*,*,vector,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387") - (set_attr "fp_int_src" "true")]) +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); -(define_expand "floatdidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] { - if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH) + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) { - ix86_expand_convert_sign_didf_sse (operands[0], operands[1]); - DONE; + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + gcc_unreachable (); }) -(define_insn "*floatdidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387" - "@ - fild%z1\t%1 - # - cvtsi2sd{q}\t{%1, %0|%0, %1} - cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,double,direct") - (set_attr "amdfam10_decode" "*,*,vector,double") +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_insn "*float2_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" - "cvtsi2sd{q}\t{%1, %0|%0, %1}" +(define_insn "*float2_sse_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "cvtsi2s\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "athlon_decode" "double,direct") (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "DF") - (set_attr "unit" "*,i387") +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_sse_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "floathixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))] "TARGET_80387" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") + (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_insn "floatsixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] +(define_insn "*float2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "unit" "*,i387") + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") (set_attr "fp_int_src" "true")]) -(define_insn "floatdixf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "unit" "*,i387") +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_insn "floatdi2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_scratch:V4SI 3 "=X,x")) + (clobber (match_scratch:V4SI 4 "=X,x")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && !optimize_size" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "") + (set_attr "unit" "i387") (set_attr "fp_int_src" "true")]) -;; %%% Kill these when reload knows how to do it. (define_split - [(set (match_operand 0 "fp_register_operand" "") - (float (match_operand 1 "register_operand" "")))] - "reload_completed - && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[0]))" - [(const_int 0)] + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "register_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && !optimize_size + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] { - operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); - operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); - ix86_free_from_memory (GET_MODE (operands[1])); - DONE; + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4])); + + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); }) -(define_expand "floatunssisf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT" +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "memory_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && !optimize_size + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] { - if (TARGET_SSE_MATH && TARGET_SSE2) - ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]); - else - x86_emit_floatuns (operands); - DONE; + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); }) -(define_expand "floatunssidf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2" - "ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;") +(define_expand "floatunssi2" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && TARGET_SSE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (DImode, slot); + } +}) (define_expand "floatunsdisf2" [(use (match_operand:SF 0 "register_operand" "")) @@ -4997,8 +5479,8 @@ (define_expand "floatunsdidf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DI 1 "nonimmediate_operand" ""))] - "TARGET_SSE_MATH && TARGET_SSE2 - && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)" + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" { if (TARGET_64BIT) x86_emit_floatuns (operands); @@ -5007,9 +5489,6 @@ DONE; }) -;; SSE extract/set expanders - - ;; Add instructions ;; %%% splits for addditi3 @@ -5044,9 +5523,7 @@ (match_dup 4)) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] - "split_ti (operands+0, 1, operands+0, operands+3); - split_ti (operands+1, 1, operands+1, operands+4); - split_ti (operands+2, 1, operands+2, operands+5);") + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") ;; %%% splits for addsidi3 ; [(set (match_operand:DI 0 "nonimmediate_operand" "") @@ -5083,9 +5560,7 @@ (match_dup 4)) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] - "split_di (operands+0, 1, operands+0, operands+3); - split_di (operands+1, 1, operands+1, operands+4); - split_di (operands+2, 1, operands+2, operands+5);") + "split_di (&operands[0], 3, &operands[0], &operands[3]);") (define_insn "adddi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") @@ -5111,6 +5586,56 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) +(define_insn "*3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*add3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "m")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*sub3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (match_dup 0)))] + "" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*si3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + (define_insn "addqi3_carry" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") @@ -5152,7 +5677,7 @@ (zero_extend:DI (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0")) - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" @@ -6730,18 +7255,11 @@ "TARGET_80387" "") -(define_expand "adddf3" - [(set (match_operand:DF 0 "register_operand" "") - (plus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "addsf3" - [(set (match_operand:SF 0 "register_operand" "") - (plus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "add3" + [(set (match_operand:MODEF 0 "register_operand" "") + (plus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Subtract instructions @@ -6777,9 +7295,7 @@ (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_ti (operands+0, 1, operands+0, operands+3); - split_ti (operands+1, 1, operands+1, operands+4); - split_ti (operands+2, 1, operands+2, operands+5);") + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") ;; %%% splits for subsidi3 @@ -6812,9 +7328,7 @@ (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_di (operands+0, 1, operands+0, operands+3); - split_di (operands+1, 1, operands+1, operands+4); - split_di (operands+2, 1, operands+2, operands+5);") + "split_di (&operands[0], 3, &operands[0], &operands[3]);") (define_insn "subdi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") @@ -6901,11 +7415,11 @@ (set_attr "mode" "SI")]) (define_insn "subsi3_carry_zext" - [(set (match_operand:DI 0 "register_operand" "=rm,r") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (minus:SI (match_operand:SI 1 "register_operand" "0") (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") - (match_operand:SI 2 "general_operand" "ri,rm"))))) + (match_operand:SI 2 "general_operand" "g"))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %k0|%k0, %2}" @@ -6935,7 +7449,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sub{l}\t{%2, %k0|%k0, %2}" @@ -6960,7 +7474,7 @@ [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -6987,7 +7501,7 @@ (define_insn "*subsi_3_zext" [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "general_operand" "rim"))) + (match_operand:SI 2 "general_operand" "g"))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_dup 1) @@ -7106,18 +7620,11 @@ "TARGET_80387" "") -(define_expand "subdf3" - [(set (match_operand:DF 0 "register_operand" "") - (minus:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "") - -(define_expand "subsf3" - [(set (match_operand:SF 0 "register_operand" "") - (minus:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" +(define_expand "sub3" + [(set (match_operand:MODEF 0 "register_operand" "") + (minus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") ;; Multiply instructions @@ -7130,11 +7637,11 @@ "TARGET_64BIT" "") -;; On AMDFAM10 +;; On AMDFAM10 ;; IMUL reg64, reg64, imm8 Direct ;; IMUL reg64, mem64, imm8 VectorPath ;; IMUL reg64, reg64, imm32 Direct -;; IMUL reg64, mem64, imm32 VectorPath +;; IMUL reg64, mem64, imm32 VectorPath ;; IMUL reg64, reg64 Direct ;; IMUL reg64, mem64 Direct @@ -7164,7 +7671,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "DI")]) (define_expand "mulsi3" @@ -7175,7 +7682,7 @@ "" "") -;; On AMDFAM10 +;; On AMDFAM10 ;; IMUL reg32, reg32, imm8 Direct ;; IMUL reg32, mem32, imm8 VectorPath ;; IMUL reg32, reg32, imm32 Direct @@ -7208,7 +7715,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "SI")]) (define_insn "*mulsi3_1_zext" @@ -7238,7 +7745,7 @@ (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand" "")) (const_string "vector")] - (const_string "direct"))) + (const_string "direct"))) (set_attr "mode" "SI")]) (define_expand "mulhi3" @@ -7306,7 +7813,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulqihi3" @@ -7333,7 +7840,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "mulqihi3" @@ -7358,7 +7865,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) - (set_attr "amdfam10_decode" "direct") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulditi3" @@ -7385,7 +7892,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers @@ -7413,7 +7920,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "mulditi3" @@ -7467,7 +7974,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "umuldi3_highpart" @@ -7504,7 +8011,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) - (set_attr "amdfam10_decode" "double") + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "umulsi3_highpart" @@ -7567,7 +8074,7 @@ (set_attr "mode" "SI")]) (define_expand "smuldi3_highpart" - [(parallel [(set (match_operand:DI 0 "register_operand" "=d") + [(parallel [(set (match_operand:DI 0 "register_operand" "") (truncate:DI (lshiftrt:TI (mult:TI (sign_extend:TI @@ -7668,19 +8175,15 @@ "TARGET_80387" "") -(define_expand "muldf3" - [(set (match_operand:DF 0 "register_operand" "") - (mult:DF (match_operand:DF 1 "register_operand" "") - (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +(define_expand "mul3" + [(set (match_operand:MODEF 0 "register_operand" "") + (mult:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "") -(define_expand "mulsf3" - [(set (match_operand:SF 0 "register_operand" "") - (mult:SF (match_operand:SF 1 "register_operand" "") - (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "") +;; SSE5 scalar multiply/add instructions are defined in sse.md. + ;; Divide instructions @@ -7725,7 +8228,16 @@ (div:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] "TARGET_80387 || TARGET_SSE_MATH" - "") +{ + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) ;; Remainder instructions. @@ -8521,7 +9033,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" "and{l}\t{%2, %k0|%k0, %2}" @@ -8531,7 +9043,7 @@ (define_insn "*andsi_2" [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (and:SI (match_dup 1) (match_dup 2)))] @@ -8545,7 +9057,7 @@ (define_insn "*andsi_2_zext" [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] @@ -8590,7 +9102,7 @@ (define_insn "*andhi_2" [(set (reg FLAGS_REG) (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (and:HI (match_dup 1) (match_dup 2)))] @@ -8899,7 +9411,7 @@ (define_insn "*iorsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rmi"))) + (match_operand:SI 2 "general_operand" "ri,g"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %0|%0, %2}" @@ -8908,10 +9420,10 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*iorsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" "or{l}\t{%2, %k0|%k0, %2}" @@ -8919,7 +9431,7 @@ (set_attr "mode" "SI")]) (define_insn "*iorsi_1_zext_imm" - [(set (match_operand:DI 0 "register_operand" "=rm") + [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) (clobber (reg:CC FLAGS_REG))] @@ -8931,7 +9443,7 @@ (define_insn "*iorsi_2" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (ior:SI (match_dup 1) (match_dup 2)))] @@ -8946,7 +9458,7 @@ (define_insn "*iorsi_2_zext" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] @@ -8972,7 +9484,7 @@ (define_insn "*iorsi_3" [(set (reg FLAGS_REG) (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -8992,7 +9504,7 @@ (define_insn "*iorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) + (match_operand:HI 2 "general_operand" "g,ri"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (IOR, HImode, operands)" "or{w}\t{%2, %0|%0, %2}" @@ -9002,7 +9514,7 @@ (define_insn "*iorhi_2" [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (ior:HI (match_dup 1) (match_dup 2)))] @@ -9015,7 +9527,7 @@ (define_insn "*iorhi_3" [(set (reg FLAGS_REG) (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9229,11 +9741,9 @@ (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (XOR, DImode, operands)" - "@ - xor{q}\t{%2, %0|%0, %2} - xor{q}\t{%2, %0|%0, %2}" + "xor{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") - (set_attr "mode" "DI,DI")]) + (set_attr "mode" "DI")]) (define_insn "*xordi_2_rex64" [(set (reg FLAGS_REG) @@ -9245,11 +9755,9 @@ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (XOR, DImode, operands)" - "@ - xor{q}\t{%2, %0|%0, %2} - xor{q}\t{%2, %0|%0, %2}" + "xor{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") - (set_attr "mode" "DI,DI")]) + (set_attr "mode" "DI")]) (define_insn "*xordi_3_rex64" [(set (reg FLAGS_REG) @@ -9288,7 +9796,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")))) + (match_operand:SI 2 "general_operand" "g")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" "xor{l}\t{%2, %k0|%k0, %2}" @@ -9308,7 +9816,7 @@ (define_insn "*xorsi_2" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rim,ri")) + (match_operand:SI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (xor:SI (match_dup 1) (match_dup 2)))] @@ -9323,7 +9831,7 @@ (define_insn "*xorsi_2_zext" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] @@ -9349,7 +9857,7 @@ (define_insn "*xorsi_3" [(set (reg FLAGS_REG) (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "general_operand" "rim")) + (match_operand:SI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9369,7 +9877,7 @@ (define_insn "*xorhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmi,ri"))) + (match_operand:HI 2 "general_operand" "g,ri"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, HImode, operands)" "xor{w}\t{%2, %0|%0, %2}" @@ -9379,7 +9887,7 @@ (define_insn "*xorhi_2" [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rim,ri")) + (match_operand:HI 2 "general_operand" "g,ri")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (xor:HI (match_dup 1) (match_dup 2)))] @@ -9392,7 +9900,7 @@ (define_insn "*xorhi_3" [(set (reg FLAGS_REG) (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") - (match_operand:HI 2 "general_operand" "rim")) + (match_operand:HI 2 "general_operand" "g")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] "ix86_match_ccmode (insn, CCNOmode) @@ -9674,20 +10182,19 @@ "TARGET_64BIT && reload_completed" [(parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (neg:DI (match_dup 2)) (const_int 0))) - (set (match_dup 0) (neg:DI (match_dup 2)))]) + (compare:CCZ (neg:DI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:DI (match_dup 1)))]) (parallel - [(set (match_dup 1) + [(set (match_dup 2) (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 3)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel - [(set (match_dup 1) - (neg:DI (match_dup 1))) + [(set (match_dup 2) + (neg:DI (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "split_ti (operands+1, 1, operands+2, operands+3); - split_ti (operands+0, 1, operands+0, operands+1);") + "split_ti (&operands[0], 2, &operands[0], &operands[2]);") (define_expand "negdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") @@ -9711,20 +10218,19 @@ "!TARGET_64BIT && reload_completed" [(parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (neg:SI (match_dup 2)) (const_int 0))) - (set (match_dup 0) (neg:SI (match_dup 2)))]) + (compare:CCZ (neg:SI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:SI (match_dup 1)))]) (parallel - [(set (match_dup 1) + [(set (match_dup 2) (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 3)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel - [(set (match_dup 1) - (neg:SI (match_dup 1))) + [(set (match_dup 2) + (neg:SI (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "split_di (operands+1, 1, operands+2, operands+3); - split_di (operands+0, 1, operands+0, operands+1);") + "split_di (&operands[0], 2, &operands[0], &operands[2]);"); (define_insn "*negdi2_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -9852,233 +10358,67 @@ "ix86_unary_operator_ok (NEG, QImode, operands)" "neg{b}\t%0" [(set_attr "type" "negnot") - (set_attr "mode" "QI")]) - -(define_insn "*negqi2_cmpz" - [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) - (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "=qm") - (neg:QI (match_dup 1)))] - "ix86_unary_operator_ok (NEG, QImode, operands)" - "neg{b}\t%0" - [(set_attr "type" "negnot") - (set_attr "mode" "QI")]) - -;; Changing of sign for FP values is doable using integer unit too. - -(define_expand "negsf2" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;") - -(define_expand "abssf2" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;") - -(define_insn "*absnegsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x ,x,f,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0,0 ")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm ,0,X,X ")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_insn "*absnegsf2_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")])) - (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,X")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_insn "*absnegsf2_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm") - (match_operator:SF 3 "absneg_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,0")])) - (use (match_operand 2 "" "")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)" - "#") - -(define_expand "copysignsf3" - [(match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "nonmemory_operand" "") - (match_operand:SF 2 "register_operand" "")] - "TARGET_SSE_MATH" -{ - ix86_expand_copysign (operands); - DONE; -}) - -(define_insn_and_split "copysignsf3_const" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF - [(match_operand:V4SF 1 "vector_move_operand" "xmC") - (match_operand:SF 2 "register_operand" "0") - (match_operand:V4SF 3 "nonimmediate_operand" "xm")] - UNSPEC_COPYSIGN))] - "TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_const (operands); - DONE; -}) - -(define_insn "copysignsf3_var" - [(set (match_operand:SF 0 "register_operand" "=x, x, x, x,x") - (unspec:SF - [(match_operand:SF 2 "register_operand" " x, 0, 0, x,x") - (match_operand:SF 3 "register_operand" " 1, 1, x, 1,x") - (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0") - (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V4SF 1 "=x, x, x, x,x"))] - "TARGET_SSE_MATH" - "#") + (set_attr "mode" "QI")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (unspec:SF - [(match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "") - (match_operand:V4SF 4 "" "") - (match_operand:V4SF 5 "" "")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V4SF 1 ""))] - "TARGET_SSE_MATH && reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_var (operands); - DONE; -}) +(define_insn "*negqi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) -(define_expand "negdf2" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;") +;; Changing of sign for FP values is doable using integer unit too. -(define_expand "absdf2" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" - "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;") +(define_expand "2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") -(define_insn "*absnegdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,f,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0,0")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X,X")) +(define_insn "*absneg2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand: 2 "nonimmediate_operand" "xm,0,X,X")) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" "#") -(define_insn "*absnegdf2_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=x,x,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0 ")])) - (use (match_operand:V2DF 2 "nonimmediate_operand" "xm,0,X ")) +(define_insn "*absneg2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand: 2 "register_operand" "xm,0,X")) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && TARGET_SSE_MATH - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "#") -(define_insn "*absnegdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm") - (match_operator:DF 3 "absneg_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,0")])) +(define_insn "*absneg2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) (use (match_operand 2 "" "")) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) - && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)" - "#") - -(define_expand "copysigndf3" - [(match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "nonmemory_operand" "") - (match_operand:DF 2 "register_operand" "")] - "TARGET_SSE2 && TARGET_SSE_MATH" -{ - ix86_expand_copysign (operands); - DONE; -}) - -(define_insn_and_split "copysigndf3_const" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF - [(match_operand:V2DF 1 "vector_move_operand" "xmC") - (match_operand:DF 2 "register_operand" "0") - (match_operand:V2DF 3 "nonimmediate_operand" "xm")] - UNSPEC_COPYSIGN))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_const (operands); - DONE; -}) - -(define_insn "copysigndf3_var" - [(set (match_operand:DF 0 "register_operand" "=x, x, x, x,x") - (unspec:DF - [(match_operand:DF 2 "register_operand" " x, 0, 0, x,x") - (match_operand:DF 3 "register_operand" " 1, 1, x, 1,x") - (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0") - (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V2DF 1 "=x, x, x, x,x"))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "#") -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (unspec:DF - [(match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "") - (match_operand:V2DF 4 "" "") - (match_operand:V2DF 5 "" "")] - UNSPEC_COPYSIGN)) - (clobber (match_scratch:V2DF 1 ""))] - "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" - [(const_int 0)] -{ - ix86_split_copysign_var (operands); - DONE; -}) - -(define_expand "negxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))] - "TARGET_80387" - "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;") - -(define_expand "absxf2" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))] - "TARGET_80387" - "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;") +(define_expand "tf2" + [(set (match_operand:TF 0 "register_operand" "") + (absneg:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_64BIT" + "ix86_expand_fp_absneg_operator (, TFmode, operands); DONE;") -(define_insn "*absnegxf2_i387" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm") - (match_operator:XF 3 "absneg_operator" - [(match_operand:XF 1 "nonimmediate_operand" "0,0")])) - (use (match_operand 2 "" "")) +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 - && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)" + "TARGET_64BIT" "#") ;; Splitters for fp abs and neg. @@ -10207,137 +10547,109 @@ operands[1] = tmp; }) -(define_split - [(set (match_operand 0 "memory_operand" "") - (match_operator 1 "absneg_operator" [(match_dup 0)])) - (use (match_operand 2 "" "")) - (clobber (reg:CC FLAGS_REG))] - "reload_completed" - [(parallel [(set (match_dup 0) (match_dup 1)) - (clobber (reg:CC FLAGS_REG))])] -{ - enum machine_mode mode = GET_MODE (operands[0]); - int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode); - rtx tmp; - - operands[0] = adjust_address (operands[0], QImode, size - 1); - if (GET_CODE (operands[1]) == ABS) - { - tmp = gen_int_mode (0x7f, QImode); - tmp = gen_rtx_AND (QImode, operands[0], tmp); - } - else - { - tmp = gen_int_mode (0x80, QImode); - tmp = gen_rtx_XOR (QImode, operands[0], tmp); - } - operands[1] = tmp; -}) - ;; Conditionalize these after reload. If they match before reload, we ;; lose the clobber and ability to use integer instructions. -(define_insn "*negsf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" - "fchs" +(define_insn "*2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed + || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + "f" [(set_attr "type" "fsgn") - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) -(define_insn "*negdf2_1" +(define_insn "*extendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" - "fchs" + (absneg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "f" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) -(define_insn "*negxf2_1" +(define_insn "*extendsfxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (match_operand:XF 1 "register_operand" "0")))] + (absneg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] "TARGET_80387" - "fchs" + "f" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) -(define_insn "*abssf2_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (abs:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "SF")]) - -(define_insn "*absdf2_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absxf2_1" +(define_insn "*extenddfxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (match_operand:XF 1 "register_operand" "0")))] + (absneg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] "TARGET_80387" - "fabs" + "f" [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) + (set_attr "mode" "XF")]) -(define_insn "*negextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) +;; Copysign instructions -(define_insn "*negextenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) -(define_insn "*negextendsfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) +(define_expand "copysign3" + [(match_operand:CSGNMODE 0 "register_operand" "") + (match_operand:CSGNMODE 1 "nonmemory_operand" "") + (match_operand:CSGNMODE 2 "register_operand" "")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" +{ + ix86_expand_copysign (operands); + DONE; +}) -(define_insn "*absextendsfdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) +(define_insn_and_split "copysign3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand: 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand: 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) -(define_insn "*absextenddfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) +(define_insn "copysign3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand: 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand: 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))" + "#") -(define_insn "*absextendsfxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) +(define_split + [(set (match_operand:CSGNMODE 0 "register_operand" "") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "") + (match_operand:CSGNMODE 3 "register_operand" "") + (match_operand: 4 "" "") + (match_operand: 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 ""))] + "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_64BIT && (mode == TFmode))) + && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) ;; One complement instructions @@ -10596,6 +10908,22 @@ "#" [(set_attr "type" "multi")]) +;; This pattern must be defined before *ashlti3_2 to prevent +;; combine pass from converting sse2_ashlti3 to *ashlti3_2. + +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + (define_insn "*ashlti3_2" [(set (match_operand:TI 0 "register_operand" "=r") (ashift:TI (match_operand:TI 1 "register_operand" "0") @@ -10639,7 +10967,7 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_64_shift_adj" [(set (reg:CCZ FLAGS_REG) @@ -10731,13 +11059,14 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashift:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, DImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { switch (get_attr_type (insn)) { @@ -10772,13 +11101,14 @@ (match_operand:QI 2 "immediate_operand" "e")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, DImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { switch (get_attr_type (insn)) { @@ -10835,7 +11165,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") @@ -10855,7 +11185,7 @@ (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_shift_adj_1" [(set (reg:CCZ FLAGS_REG) @@ -11059,13 +11389,13 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashift:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11100,13 +11430,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11142,13 +11472,14 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands) + "TARGET_64BIT && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { @@ -11266,13 +11597,13 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { @@ -11307,13 +11638,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { @@ -11470,13 +11801,13 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashift:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { @@ -11511,13 +11842,13 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL - || (operands[2] == const1_rtx - && (TARGET_SHIFT1 - || TARGET_DOUBLE_WITH_ADD)))" + "(optimize_size + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { @@ -11616,7 +11947,7 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") - (set_attr "amdfam10_decode" "vector")]) + (set_attr "amdfam10_decode" "vector")]) (define_expand "ashrdi3" [(set (match_operand:DI 0 "shiftdi_operand" "") @@ -11647,8 +11978,9 @@ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11679,8 +12011,9 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11696,8 +12029,9 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11714,10 +12048,10 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -11729,10 +12063,10 @@ (match_operand:QI 2 "const_int_operand" "n")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -11766,7 +12100,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") @@ -11860,8 +12194,8 @@ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11874,8 +12208,9 @@ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11915,8 +12250,8 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11932,8 +12267,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11947,8 +12282,9 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") @@ -11965,10 +12301,9 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -11980,10 +12315,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -11996,10 +12330,10 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12017,8 +12351,8 @@ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12049,8 +12383,8 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -12065,9 +12399,9 @@ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" "")) (const_int 0))) - (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + (clobber (match_scratch:HI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -12084,10 +12418,9 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12099,10 +12432,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12120,8 +12452,8 @@ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12134,9 +12466,9 @@ (ashiftrt:QI (match_dup 0) (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (! TARGET_PARTIAL_REG_STALL || optimize_size) - && (TARGET_SHIFT1 || optimize_size)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift1") (set (attr "length") @@ -12180,8 +12512,8 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -12197,8 +12529,8 @@ (match_operand:QI 2 "const1_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -12215,10 +12547,9 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12230,10 +12561,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12269,6 +12599,22 @@ "#" [(set_attr "type" "multi")]) +;; This pattern must be defined before *lshrti3_2 to prevent +;; combine pass from converting sse2_lshrti3 to *lshrti3_2. + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + (define_insn "*lshrti3_2" [(set (match_operand:TI 0 "register_operand" "=r") (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") @@ -12309,8 +12655,9 @@ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12341,8 +12688,9 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -12358,8 +12706,9 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -12376,10 +12725,10 @@ (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -12391,10 +12740,10 @@ (match_operand:QI 2 "const_int_operand" "e")) (const_int 0))) (clobber (match_scratch:DI 0 "=r"))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) @@ -12428,7 +12777,7 @@ (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) - ? flow2_completed : reload_completed)" + ? epilogue_completed : reload_completed)" [(const_int 0)] "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") @@ -12445,8 +12794,8 @@ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12459,8 +12808,9 @@ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -12501,8 +12851,8 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12518,8 +12868,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12533,8 +12883,9 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + "TARGET_64BIT && (TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") @@ -12551,10 +12902,9 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12566,10 +12916,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:SI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12582,10 +12931,10 @@ (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "TARGET_64BIT + && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) @@ -12603,8 +12952,8 @@ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12635,8 +12984,8 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12652,8 +13001,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12670,10 +13019,9 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12685,10 +13033,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:HI 0 "=r"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "HI")]) @@ -12706,8 +13053,8 @@ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12765,8 +13112,8 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12782,8 +13129,8 @@ (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_SHIFT1 || optimize_size) + "(TARGET_SHIFT1 || optimize_size) + && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12800,10 +13147,9 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12815,10 +13161,9 @@ (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (clobber (match_scratch:QI 0 "=q"))] - "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (optimize_size - || !TARGET_PARTIAL_FLAG_REG_STALL)" + "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t{%2, %0|%0, %2}" [(set_attr "type" "ishift") (set_attr "mode" "QI")]) @@ -12867,15 +13212,16 @@ (lshiftrt:SI (match_dup 3) (minus:QI (const_int 32) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))])] - "split_di (operands, 1, operands + 4, operands + 5);") + "split_di (&operands[0], 1, &operands[4], &operands[5]);") (define_insn "*rotlsi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, DImode, operands)" "rol{q}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -12908,8 +13254,8 @@ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -12923,8 +13269,9 @@ (rotate:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" "rol{l}\t%k0" [(set_attr "type" "rotate") (set_attr "length" "2")]) @@ -12967,8 +13314,8 @@ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, HImode, operands)" "rol{w}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13025,8 +13372,8 @@ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATE, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATE, QImode, operands)" "rol{b}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13101,15 +13448,16 @@ (ashift:SI (match_dup 3) (minus:QI (const_int 32) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))])] - "split_di (operands, 1, operands + 4, operands + 5);") + "split_di (&operands[0], 1, &operands[4], &operands[5]);") (define_insn "*rotrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, DImode, operands)" "ror{q}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13142,8 +13490,8 @@ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13157,8 +13505,9 @@ (rotatert:SI (match_operand:SI 1 "register_operand" "0") (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" "ror{l}\t%k0" [(set_attr "type" "rotate") (set (attr "length") @@ -13204,8 +13553,8 @@ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, HImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, HImode, operands)" "ror{w}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13248,8 +13597,8 @@ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ROTATERT, QImode, operands) - && (TARGET_SHIFT1 || optimize_size)" + "(TARGET_SHIFT1 || optimize_size) + && ix86_binary_operator_ok (ROTATERT, QImode, operands)" "ror{b}\t%0" [(set_attr "type" "rotate") (set (attr "length") @@ -13507,113 +13856,17 @@ ;; to avoid partial register stalls. Otherwise do things the setcc+movzx ;; way, which can later delete the movzx if only QImode is needed. -(define_expand "seq" - [(set (match_operand:QI 0 "register_operand" "") - (eq:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;") - -(define_expand "sne" - [(set (match_operand:QI 0 "register_operand" "") - (ne:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;") - -(define_expand "sgt" - [(set (match_operand:QI 0 "register_operand" "") - (gt:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;") - -(define_expand "sgtu" - [(set (match_operand:QI 0 "register_operand" "") - (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;") - -(define_expand "slt" - [(set (match_operand:QI 0 "register_operand" "") - (lt:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;") - -(define_expand "sltu" - [(set (match_operand:QI 0 "register_operand" "") - (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;") - -(define_expand "sge" - [(set (match_operand:QI 0 "register_operand" "") - (ge:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;") - -(define_expand "sgeu" - [(set (match_operand:QI 0 "register_operand" "") - (geu:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;") - -(define_expand "sle" - [(set (match_operand:QI 0 "register_operand" "") - (le:QI (reg:CC FLAGS_REG) (const_int 0)))] - "" - "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;") - -(define_expand "sleu" +(define_expand "s" [(set (match_operand:QI 0 "register_operand" "") - (leu:QI (reg:CC FLAGS_REG) (const_int 0)))] + (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] "" - "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;") + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") -(define_expand "sunordered" +(define_expand "s" [(set (match_operand:QI 0 "register_operand" "") - (unordered:QI (reg:CC FLAGS_REG) (const_int 0)))] + (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;") - -(define_expand "sordered" - [(set (match_operand:QI 0 "register_operand" "") - (ordered:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387" - "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;") - -(define_expand "suneq" - [(set (match_operand:QI 0 "register_operand" "") - (uneq:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;") - -(define_expand "sunge" - [(set (match_operand:QI 0 "register_operand" "") - (unge:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;") - -(define_expand "sungt" - [(set (match_operand:QI 0 "register_operand" "") - (ungt:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;") - -(define_expand "sunle" - [(set (match_operand:QI 0 "register_operand" "") - (unle:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;") - -(define_expand "sunlt" - [(set (match_operand:QI 0 "register_operand" "") - (unlt:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;") - -(define_expand "sltgt" - [(set (match_operand:QI 0 "register_operand" "") - (ltgt:QI (reg:CC FLAGS_REG) (const_int 0)))] - "TARGET_80387 || TARGET_SSE" - "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;") + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") (define_insn "*setcc_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") @@ -13706,177 +13959,52 @@ ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent -;; it directly. - -(define_insn "*sse_setccsf" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 1 "sse_comparison_operator" - [(match_operand:SF 2 "register_operand" "0") - (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE" - "cmp%D1ss\t{%3, %0|%0, %3}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "*sse_setccdf" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 1 "sse_comparison_operator" - [(match_operand:DF 2 "register_operand" "0") - (match_operand:DF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2" - "cmp%D1sd\t{%3, %0|%0, %3}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) - -;; Basic conditional jump instructions. -;; We ignore the overflow flag for signed branch instructions. - -;; For all bCOND expanders, also expand the compare or test insn that -;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'. - -(define_expand "beq" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (EQ, operands[0]); DONE;") - -(define_expand "bne" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (NE, operands[0]); DONE;") - -(define_expand "bgt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GT, operands[0]); DONE;") - -(define_expand "bgtu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GTU, operands[0]); DONE;") - -(define_expand "blt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LT, operands[0]); DONE;") - -(define_expand "bltu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LTU, operands[0]); DONE;") - -(define_expand "bge" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GE, operands[0]); DONE;") - -(define_expand "bgeu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (GEU, operands[0]); DONE;") - -(define_expand "ble" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LE, operands[0]); DONE;") - -(define_expand "bleu" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ix86_expand_branch (LEU, operands[0]); DONE;") - -(define_expand "bunordered" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNORDERED, operands[0]); DONE;") - -(define_expand "bordered" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (ORDERED, operands[0]); DONE;") +;; it directly. -(define_expand "buneq" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNEQ, operands[0]); DONE;") +(define_insn "*sse_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && !TARGET_SSE5" + "cmp%D1s\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "")]) -(define_expand "bunge" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNGE, operands[0]); DONE;") +(define_insn "*sse5_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse5_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE5" + "com%Y1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "")]) -(define_expand "bungt" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNGT, operands[0]); DONE;") + +;; Basic conditional jump instructions. +;; We ignore the overflow flag for signed branch instructions. -(define_expand "bunle" - [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) - (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNLE, operands[0]); DONE;") +;; For all bCOND expanders, also expand the compare or test insn that +;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'. -(define_expand "bunlt" +(define_expand "b" [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) + (if_then_else (int_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) (pc)))] - "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (UNLT, operands[0]); DONE;") + "" + "ix86_expand_branch (, operands[0]); DONE;") -(define_expand "bltgt" +(define_expand "b" [(set (pc) - (if_then_else (match_dup 1) - (label_ref (match_operand 0 "" "")) + (if_then_else (fp_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) (pc)))] "TARGET_80387 || TARGET_SSE_MATH" - "ix86_expand_branch (LTGT, operands[0]); DONE;") + "ix86_expand_branch (, operands[0]); DONE;") (define_insn "*jcc_1" [(set (pc) @@ -14006,8 +14134,8 @@ (pc))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14051,8 +14179,8 @@ (label_ref (match_operand 3 "" "")))) (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG))] - "TARGET_CMOVE && TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14105,8 +14233,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14121,8 +14248,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" "#") @@ -14137,8 +14263,7 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 4 "=a"))] - "TARGET_80387 - && FLOAT_MODE_P (GET_MODE (operands[1])) + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2]) && !ix86_use_fcomi_compare (GET_CODE (operands[0])) && SELECT_CC_MODE (GET_CODE (operands[0]), @@ -14162,8 +14287,8 @@ (clobber (reg:CCFP FPSR_REG)) (clobber (reg:CCFP FLAGS_REG)) (clobber (match_scratch:HI 5 "=a,a"))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && FLOAT_MODE_P (GET_MODE (operands[3])) + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && TARGET_USE_MODE_FIOP && GET_MODE (operands[1]) == GET_MODE (operands[3]) && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode @@ -14267,7 +14392,7 @@ (set_attr "modrm" "0")]) (define_expand "indirect_jump" - [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] "" "") @@ -14286,7 +14411,7 @@ (set_attr "length_immediate" "0")]) (define_expand "tablejump" - [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm")) + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "")) (use (label_ref (match_operand 1 "" "")))])] "" { @@ -14521,7 +14646,7 @@ [(call (mem:QI (reg:DI R11_REG)) (match_operand 0 "" ""))] "SIBLING_CALL_P (insn) && TARGET_64BIT" - "jmp\t*%%r11" + "jmp\t{*%%}r11" [(set_attr "type" "call")]) @@ -14596,7 +14721,7 @@ registers we stored in the result block. We avoid problems by claiming that all hard registers are used and clobbered at this point. */ - emit_insn (gen_blockage (const0_rtx)); + emit_insn (gen_blockage ()); DONE; }) @@ -14607,7 +14732,15 @@ ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" - [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)] + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)] "" "" [(set_attr "length" "0")]) @@ -14620,9 +14753,9 @@ [(return)] "ix86_can_use_return_insn_p ()" { - if (current_function_pops_args) + if (crtl->args.pops_args) { - rtx popc = GEN_INT (current_function_pops_args); + rtx popc = GEN_INT (crtl->args.pops_args); emit_jump_insn (gen_return_pop_internal (popc)); DONE; } @@ -14643,7 +14776,7 @@ [(return) (unspec [(const_int 0)] UNSPEC_REP)] "reload_completed" - "rep {;} ret" + "rep\;ret" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") @@ -14695,7 +14828,7 @@ [(set_attr "length" "16")]) (define_expand "prologue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_prologue (); DONE;") @@ -14722,7 +14855,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] "TARGET_64BIT" - "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" [(set_attr "type" "lea") (set_attr "length" "6")]) @@ -14730,7 +14863,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_RIP))] "TARGET_64BIT" - "lea{q}\t%l1(%%rip), %0" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" [(set_attr "type" "lea") (set_attr "length" "6")]) @@ -14738,17 +14871,17 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_GOT_OFFSET))] "TARGET_64BIT" - "movabs{q}\t$_GLOBAL_OFFSET_TABLE_-%l1, %0" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" [(set_attr "type" "imov") (set_attr "length" "11")]) (define_expand "epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (1); DONE;") (define_expand "sibcall_epilogue" - [(const_int 1)] + [(const_int 0)] "" "ix86_expand_epilogue (0); DONE;") @@ -14781,7 +14914,7 @@ "!TARGET_64BIT" "#" "reload_completed" - [(const_int 1)] + [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_insn_and_split "eh_return_di" @@ -14791,7 +14924,7 @@ "TARGET_64BIT" "#" "reload_completed" - [(const_int 1)] + [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_insn "leave" @@ -14817,36 +14950,40 @@ (clobber (match_scratch:SI 2 "")) (clobber (reg:CC FLAGS_REG))])] "" - "") +{ + if (TARGET_CMOVE) + { + emit_insn (gen_ffs_cmove (operands[0], operands[1])); + DONE; + } +}) -(define_insn_and_split "*ffs_cmove" - [(set (match_operand:SI 0 "register_operand" "=r") - (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:SI 2 "=&r")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_CMOVE" - "#" - "&& reload_completed" +(define_expand "ffs_cmove" [(set (match_dup 2) (const_int -1)) - (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) - (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (ctz:SI (match_dup 1)))]) (set (match_dup 0) (if_then_else:SI (eq (reg:CCZ FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] - "") + "TARGET_CMOVE" + "operands[2] = gen_reg_rtx (SImode);") (define_insn_and_split "*ffs_no_cmove" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (match_scratch:SI 2 "=&q")) (clobber (reg:CC FLAGS_REG))] - "" + "!TARGET_CMOVE" "#" - "reload_completed" - [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) + "&& reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) (set (match_dup 0) (ctz:SI (match_dup 1)))]) (set (strict_low_part (match_dup 3)) (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) @@ -14872,33 +15009,20 @@ [(set_attr "prefix_0f" "1")]) (define_expand "ffsdi2" - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (ffs:DI (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (match_scratch:DI 2 "")) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT && TARGET_CMOVE" - "") - -(define_insn_and_split "*ffs_rex64" - [(set (match_operand:DI 0 "register_operand" "=r") - (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:DI 2 "=&r")) - (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && TARGET_CMOVE" - "#" - "&& reload_completed" [(set (match_dup 2) (const_int -1)) (parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (match_dup 1) (const_int 0))) - (set (match_dup 0) (ctz:DI (match_dup 1)))]) + (compare:CCZ (match_operand:DI 1 "register_operand" "") + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "") + (ctz:DI (match_dup 1)))]) (set (match_dup 0) (if_then_else:DI (eq (reg:CCZ FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] - "") + "TARGET_64BIT" + "operands[2] = gen_reg_rtx (DImode);") (define_insn "*ffsdi_1" [(set (reg:CCZ FLAGS_REG) @@ -15177,7 +15301,7 @@ (define_expand "paritydi2" [(set (match_operand:DI 0 "register_operand" "") - (parity:DI (match_operand:DI 1 "nonimmediate_operand" "")))] + (parity:DI (match_operand:DI 1 "register_operand" "")))] "! TARGET_POPCNT" { rtx scratch = gen_reg_rtx (QImode); @@ -15205,10 +15329,10 @@ (define_insn_and_split "paritydi2_cmp" [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:DI 3 "nonimmediate_operand" "0,m"))) - (clobber (match_scratch:DI 0 "=r,X")) - (clobber (match_scratch:SI 1 "=r,r")) - (clobber (match_scratch:HI 2 "=Q,Q"))] + (parity:CC (match_operand:DI 3 "register_operand" "0"))) + (clobber (match_scratch:DI 0 "=r")) + (clobber (match_scratch:SI 1 "=&r")) + (clobber (match_scratch:HI 2 "=Q"))] "! TARGET_POPCNT" "#" "&& reload_completed" @@ -15224,20 +15348,18 @@ { operands[4] = gen_lowpart (SImode, operands[3]); - if (MEM_P (operands[3])) - emit_move_insn (operands[1], gen_highpart (SImode, operands[3])); - else if (! TARGET_64BIT) - operands[1] = gen_highpart (SImode, operands[3]); - else + if (TARGET_64BIT) { emit_move_insn (operands[1], gen_lowpart (SImode, operands[3])); emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32))); } + else + operands[1] = gen_highpart (SImode, operands[3]); }) (define_expand "paritysi2" [(set (match_operand:SI 0 "register_operand" "") - (parity:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + (parity:SI (match_operand:SI 1 "register_operand" "")))] "! TARGET_POPCNT" { rtx scratch = gen_reg_rtx (QImode); @@ -15256,9 +15378,9 @@ (define_insn_and_split "paritysi2_cmp" [(set (reg:CC FLAGS_REG) - (parity:CC (match_operand:SI 2 "nonimmediate_operand" "0,m"))) - (clobber (match_scratch:SI 0 "=r,X")) - (clobber (match_scratch:HI 1 "=Q,Q"))] + (parity:CC (match_operand:SI 2 "register_operand" "0"))) + (clobber (match_scratch:SI 0 "=r")) + (clobber (match_scratch:HI 1 "=&Q"))] "! TARGET_POPCNT" "#" "&& reload_completed" @@ -15273,13 +15395,8 @@ { operands[3] = gen_lowpart (HImode, operands[2]); - if (MEM_P (operands[2])) - emit_move_insn (operands[1], gen_highpart (HImode, operands[2])); - else - { - emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); - emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); - } + emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); + emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); }) (define_insn "*parityhi2_cmp" @@ -15368,7 +15485,7 @@ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLS_GD)] "TARGET_64BIT" - ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2" + ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\;.word\t0x6666\;rex64\;call\t%P2" [(set_attr "type" "multi") (set_attr "length" "16")]) @@ -15446,7 +15563,7 @@ (match_operand:DI 2 "" ""))) (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" - "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -15496,7 +15613,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(const_int 0)] UNSPEC_TP))] "!TARGET_64BIT" - "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15509,7 +15626,7 @@ (match_operand:SI 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" [(set_attr "type" "alu") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15520,7 +15637,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_TP))] "TARGET_64BIT" - "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15533,7 +15650,7 @@ (match_operand:DI 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" [(set_attr "type" "alu") (set_attr "modrm" "0") (set_attr "length" "7") @@ -15556,7 +15673,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT && TARGET_GNU2_TLS" { - operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); ix86_tls_descriptor_calls_expanded_in_cfun = true; }) @@ -15605,7 +15722,7 @@ "" [(set (match_dup 0) (match_dup 5))] { - operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); }) @@ -15620,7 +15737,7 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && TARGET_GNU2_TLS" { - operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); ix86_tls_descriptor_calls_expanded_in_cfun = true; }) @@ -15629,7 +15746,7 @@ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] UNSPEC_TLSDESC))] "TARGET_64BIT && TARGET_GNU2_TLS" - "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}" [(set_attr "type" "lea") (set_attr "mode" "DI") (set_attr "length" "7") @@ -15664,7 +15781,7 @@ "" [(set (match_dup 0) (match_dup 4))] { - operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); + operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); }) @@ -15681,294 +15798,159 @@ ;; Gcc is slightly more smart about handling normal two address instructions ;; so use special patterns for add and mull. -(define_insn "*fop_sf_comm_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0,0") - (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))] - "TARGET_MIX_SSE_I387 +(define_insn "*fop__comm_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd")) - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) -(define_insn "*fop_sf_comm_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE_MATH +(define_insn "*fop__comm_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) -(define_insn "*fop_sf_comm_i387" - [(set (match_operand:SF 0 "register_operand" "=f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "fm")]))] +(define_insn "*fop__comm_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))] "TARGET_80387 && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (if_then_else (match_operand:SF 3 "mult_operator" "") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") (const_string "fop"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) -(define_insn "*fop_sf_1_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,f,x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))] - "TARGET_MIX_SSE_I387 +(define_insn "*fop__1_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && !COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(and (eq_attr "alternative" "2") - (match_operand:SF 3 "mult_operator" "")) + (match_operand:MODEF 3 "mult_operator" "")) (const_string "ssemul") (and (eq_attr "alternative" "2") - (match_operand:SF 3 "div_operator" "")) + (match_operand:MODEF 3 "div_operator" "")) (const_string "ssediv") (eq_attr "alternative" "2") (const_string "sseadd") - (match_operand:SF 3 "mult_operator" "") + (match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") - (match_operand:SF 3 "div_operator" "") + (match_operand:MODEF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) -(define_insn "*fop_sf_1_sse" +(define_insn "*rcpsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE_MATH + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "rcpss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "*fop__1_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !COMMUTATIVE_ARITH_P (operands[3])" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") + (cond [(match_operand:MODEF 3 "mult_operator" "") (const_string "ssemul") - (match_operand:SF 3 "div_operator" "") + (match_operand:MODEF 3 "div_operator" "") (const_string "ssediv") ] (const_string "sseadd"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) ;; This pattern is not fully shadowed by the pattern above. -(define_insn "*fop_sf_1_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "nonimmediate_operand" "0,fm") - (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] +(define_insn "*fop__1_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))] "TARGET_80387 && !TARGET_SSE_MATH && !COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) - -;; ??? Add SSE splitters for these! -(define_insn "*fop_sf_2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) - (match_operand:SF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_MODE_FIOP && !TARGET_SSE_MATH" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") + (cond [(match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") - (match_operand:SF 3 "div_operator" "") + (match_operand:MODEF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "")]) - -(define_insn "*fop_sf_3_i387" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_fp_operator" - [(match_operand:SF 1 "register_operand" "0,0") - (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_MODE_FIOP && !TARGET_SSE_MATH" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:SF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:SF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") (set_attr "mode" "")]) -(define_insn "*fop_df_comm_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,x") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0,0") - (match_operand:DF 2 "nonimmediate_operand" "fm,xm")]))] - "TARGET_SSE2 && TARGET_MIX_SSE_I387 - && COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (eq_attr "alternative" "1") - (if_then_else (match_operand:DF 3 "mult_operator" "") - (const_string "ssemul") - (const_string "sseadd")) - (if_then_else (match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop")))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_df_comm_sse" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2 && TARGET_SSE_MATH - && COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:DF 3 "mult_operator" "") - (const_string "ssemul") - (const_string "sseadd"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_df_comm_i387" - [(set (match_operand:DF 0 "register_operand" "=f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "fm")]))] - "TARGET_80387 - && COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_df_1_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,f,x") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:DF 2 "nonimmediate_operand" "fm,0,xm")]))] - "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 - && !COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(and (eq_attr "alternative" "2") - (match_operand:DF 3 "mult_operator" "")) - (const_string "ssemul") - (and (eq_attr "alternative" "2") - (match_operand:DF 3 "div_operator" "")) - (const_string "ssediv") - (eq_attr "alternative" "2") - (const_string "sseadd") - (match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_df_1_sse" - [(set (match_operand:DF 0 "register_operand" "=x") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE2 && TARGET_SSE_MATH - && !COMMUTATIVE_ARITH_P (operands[3])" - "* return output_387_binary_op (insn, operands);" - [(set_attr "mode" "DF") - (set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "ssemul") - (match_operand:DF 3 "div_operator" "") - (const_string "ssediv") - ] - (const_string "sseadd")))]) - -;; This pattern is not fully shadowed by the pattern above. -(define_insn "*fop_df_1_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "nonimmediate_operand" "0,fm") - (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) - && !COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:DF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - ;; ??? Add SSE splitters for these! -(define_insn "*fop_df_2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) - (match_operand:DF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && !(TARGET_SSE2 && TARGET_SSE_MATH)" +(define_insn "*fop__2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(float:MODEF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:MODEF 2 "register_operand" "0,0")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") + (cond [(match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") - (match_operand:DF 3 "div_operator" "") + (match_operand:MODEF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) (set_attr "fp_int_src" "true") - (set_attr "mode" "")]) - -(define_insn "*fop_df_3_i387" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_fp_operator" - [(match_operand:DF 1 "register_operand" "0,0") - (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_MODE_FIOP - && !(TARGET_SSE2 && TARGET_SSE_MATH)" + (set_attr "mode" "")]) + +(define_insn "*fop__3_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0,0") + (float:MODEF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && !TARGET_SSE_MATH + && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "mult_operator" "") + (cond [(match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") - (match_operand:DF 3 "div_operator" "") + (match_operand:MODEF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) @@ -16058,11 +16040,12 @@ (const_string "fop"))) (set_attr "mode" "XF")]) -(define_insn "*fop_xf_2_i387" +(define_insn "*fop_xf_2_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) - (match_operand:XF 2 "register_operand" "0,0")]))] + [(float:XF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] "TARGET_80387 && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16075,11 +16058,12 @@ (set_attr "fp_int_src" "true") (set_attr "mode" "")]) -(define_insn "*fop_xf_3_i387" +(define_insn "*fop_xf_3_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,0") - (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + (float:XF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] "TARGET_80387 && TARGET_USE_MODE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16096,7 +16080,7 @@ [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF - (match_operand:X87MODEF12 1 "nonimmediate_operand" "fm,0")) + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" @@ -16107,14 +16091,14 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) (define_insn "*fop_xf_5_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF - (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16124,15 +16108,15 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) (define_insn "*fop_xf_6_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0,f")) + (match_operand:MODEF 1 "register_operand" "0,f")) (float_extend:XF - (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -16142,15 +16126,15 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) (define_split [(set (match_operand 0 "register_operand" "") (match_operator 3 "binary_fp_operator" [(float (match_operand:X87MODEI12 1 "register_operand" "")) (match_operand 2 "register_operand" "")]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); @@ -16169,8 +16153,8 @@ (match_operator 3 "binary_fp_operator" [(match_operand 1 "register_operand" "") (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] - "TARGET_80387 && reload_completed - && FLOAT_MODE_P (GET_MODE (operands[0]))" + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); @@ -16190,8 +16174,8 @@ ;; all fancy i386 XFmode math functions. (define_insn "truncxf2_i387_noop_unspec" - [(set (match_operand:X87MODEF12 0 "register_operand" "=f") - (unspec:X87MODEF12 [(match_operand:XF 1 "register_operand" "f")] + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] UNSPEC_TRUNC_NOOP))] "TARGET_USE_FANCY_MATH_387" "* return output_387_reg_move (insn, operands);" @@ -16212,18 +16196,37 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))))] + (match_operand:MODEF 1 "register_operand" "0"))))] "TARGET_USE_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct") + (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "direct")]) +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "rsqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + (define_insn "*sqrt2_sse" - [(set (match_operand:SSEMODEF 0 "register_operand" "=x") - (sqrt:SSEMODEF - (match_operand:SSEMODEF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "sqrts\t{%1, %0|%0, %1}" [(set_attr "type" "sse") @@ -16232,12 +16235,21 @@ (set_attr "amdfam10_decode" "*")]) (define_expand "sqrt2" - [(set (match_operand:X87MODEF12 0 "register_operand" "") - (sqrt:X87MODEF12 - (match_operand:X87MODEF12 1 "nonimmediate_operand" "")))] + [(set (match_operand:MODEF 0 "register_operand" "") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "")))] "TARGET_USE_FANCY_MATH_387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { + if (mode == SFmode + && TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { rtx op0 = gen_reg_rtx (XFmode); @@ -16267,27 +16279,31 @@ (define_expand "fmodxf3" [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" "")) - (use (match_operand:XF 2 "register_operand" ""))] + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); - emit_label (label); + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); - emit_insn (gen_fpremxf4_i387 (operands[1], operands[2], - operands[1], operands[2])); + emit_move_insn (op1, operands[1]); + emit_move_insn (op2, operands[2]); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; - emit_move_insn (operands[0], operands[1]); + emit_move_insn (operands[0], op1); DONE; }) (define_expand "fmod3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) - (use (match_operand:X87MODEF12 2 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); @@ -16331,27 +16347,31 @@ (define_expand "remainderxf3" [(use (match_operand:XF 0 "register_operand" "")) - (use (match_operand:XF 1 "register_operand" "")) - (use (match_operand:XF 2 "register_operand" ""))] + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); - emit_label (label); + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op1, operands[1]); + emit_move_insn (op2, operands[2]); - emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2], - operands[1], operands[2])); + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; - emit_move_insn (operands[0], operands[1]); + emit_move_insn (operands[0], op1); DONE; }) (define_expand "remainder3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) - (use (match_operand:X87MODEF12 2 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387" { rtx label = gen_label_rtx (); @@ -16390,7 +16410,7 @@ (define_insn "*sin_extendxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))] + (match_operand:MODEF 1 "register_operand" "0"))] UNSPEC_SIN))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16412,7 +16432,7 @@ (define_insn "*cos_extendxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0"))] + (match_operand:MODEF 1 "register_operand" "0"))] UNSPEC_COS))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16447,7 +16467,7 @@ (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] "") @@ -16458,14 +16478,14 @@ (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] "") (define_insn "sincos_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] @@ -16480,31 +16500,31 @@ (define_split [(set (match_operand:XF 0 "register_operand" "") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" ""))] + (match_operand:MODEF 2 "register_operand" ""))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))] "") (define_split [(set (match_operand:XF 0 "register_operand" "") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" ""))] + (match_operand:MODEF 2 "register_operand" ""))] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) - && !reload_completed && !reload_in_progress" + && !(reload_completed || reload_in_progress)" [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))] "") (define_expand "sincos3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" "")) - (use (match_operand:X87MODEF12 2 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16533,11 +16553,11 @@ (set_attr "mode" "XF")]) (define_insn "fptan_extendxf4_i387" - [(set (match_operand:X87MODEF12 0 "register_operand" "=f") - (match_operand:X87MODEF12 3 "const_double_operand" "F")) + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_TAN))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -16562,8 +16582,8 @@ }) (define_expand "tan2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16595,9 +16615,9 @@ (define_insn "fpatan_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "u"))] + (match_operand:MODEF 2 "register_operand" "u"))] UNSPEC_FPATAN)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 @@ -16619,9 +16639,9 @@ "") (define_expand "atan23" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" "")) - (use (match_operand:X87MODEF12 2 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16648,8 +16668,8 @@ }) (define_expand "atan2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16687,8 +16707,8 @@ }) (define_expand "asin2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16725,8 +16745,8 @@ }) (define_expand "acos2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16756,7 +16776,7 @@ (define_insn "fyl2x_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (match_operand:XF 2 "register_operand" "u")] UNSPEC_FYL2X)) (clobber (match_scratch:XF 3 "=2"))] @@ -16781,8 +16801,8 @@ }) (define_expand "log2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16811,8 +16831,8 @@ }) (define_expand "log102" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16841,8 +16861,8 @@ }) (define_expand "log22" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16873,7 +16893,7 @@ (define_insn "fyl2xp1_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 1 "register_operand" "0")) + (match_operand:MODEF 1 "register_operand" "0")) (match_operand:XF 2 "register_operand" "u")] UNSPEC_FYL2XP1)) (clobber (match_scratch:XF 3 "=2"))] @@ -16896,8 +16916,8 @@ }) (define_expand "log1p2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16927,7 +16947,7 @@ (define_insn "fxtract_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF12 2 "register_operand" "0"))] + (match_operand:MODEF 2 "register_operand" "0"))] UNSPEC_XTRACT_FRACT)) (set (match_operand:XF 1 "register_operand" "=u") (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] @@ -16952,8 +16972,8 @@ }) (define_expand "logb2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -16983,7 +17003,7 @@ (define_expand "ilogb2" [(use (match_operand:SI 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "register_operand" ""))] + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17059,8 +17079,8 @@ }) (define_expand "exp2" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17089,8 +17109,8 @@ }) (define_expand "exp102" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17119,8 +17139,8 @@ }) (define_expand "exp22" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17173,8 +17193,8 @@ }) (define_expand "expm12" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17207,8 +17227,8 @@ }) (define_expand "ldexp3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) (use (match_operand:SI 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -17239,9 +17259,9 @@ }) (define_expand "scalb3" - [(use (match_operand:X87MODEF12 0 "register_operand" "")) - (use (match_operand:X87MODEF12 1 "general_operand" "")) - (use (match_operand:X87MODEF12 2 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -17259,6 +17279,17 @@ }) +(define_insn "sse4_1_round2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "rounds\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "")]) + (define_insn "rintxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] @@ -17270,20 +17301,26 @@ (set_attr "mode" "XF")]) (define_expand "rint2" - [(use (match_operand:SSEMODEF 0 "register_operand" "")) - (use (match_operand:SSEMODEF 1 "register_operand" ""))] + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size)" + && (TARGET_ROUND || !optimize_size))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size) - ix86_expand_rint (operand0, operand1); + && (TARGET_ROUND || !optimize_size)) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operand0, operand1); + } else { rtx op0 = gen_reg_rtx (XFmode); @@ -17298,13 +17335,13 @@ }) (define_expand "round2" - [(match_operand:SSEMODEF 0 "register_operand" "") - (match_operand:SSEMODEF 1 "nonimmediate_operand" "")] + [(match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "nonimmediate_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math && !optimize_size" { - if ((mode != DFmode) || TARGET_64BIT) + if (TARGET_64BIT || (mode != DFmode)) ix86_expand_round (operand0, operand1); else ix86_expand_rounddf_32 (operand0, operand1); @@ -17312,8 +17349,8 @@ }) (define_insn_and_split "*fistdi2_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 && !(reload_completed || reload_in_progress)" @@ -17348,7 +17385,7 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] UNSPEC_FIST)) - (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m")) (clobber (match_scratch:XF 3 "=&1f,&1f"))] "TARGET_USE_FANCY_MATH_387" "#" @@ -17379,8 +17416,8 @@ "") (define_insn_and_split "*fist2_1" - [(set (match_operand:X87MODEI12 0 "register_operand" "=r") - (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387 && !(reload_completed || reload_in_progress)" @@ -17441,18 +17478,18 @@ "TARGET_USE_FANCY_MATH_387" "") -(define_expand "lrint2" +(define_expand "lrint2" [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") - (unspec:SSEMODEI24 [(match_operand:SSEMODEF 1 "register_operand" "")] + (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] UNSPEC_FIX_NOTRUNC))] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && ((mode != DImode) || TARGET_64BIT)" "") -(define_expand "lround2" +(define_expand "lround2" [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && ((mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math && !optimize_size" @@ -17463,8 +17500,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_floor" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_FLOOR)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17510,20 +17547,25 @@ DONE; }) -(define_expand "floordf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "floor2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, true); else ix86_expand_floorceildf_32 (operand0, operand1, true); @@ -17533,43 +17575,17 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "floorsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, true); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_floor (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) (define_insn_and_split "*fist2_floor_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST_FLOOR)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17619,7 +17635,7 @@ UNSPEC_FIST_FLOOR)) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) (clobber (match_scratch:XF 5 "=&1f,&1f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" @@ -17678,7 +17694,7 @@ UNSPEC_FIST_FLOOR)) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "#" @@ -17727,7 +17743,7 @@ (define_expand "lfloordi2" [(match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT && !flag_trapping_math && !optimize_size" @@ -17738,7 +17754,7 @@ (define_expand "lfloorsi2" [(match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && (!optimize_size || !TARGET_64BIT)" @@ -17749,8 +17765,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_ceil" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_CEIL)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17796,20 +17812,25 @@ DONE; }) -(define_expand "ceildf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "ceil2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, false); else ix86_expand_floorceildf_32 (operand0, operand1, false); @@ -17819,43 +17840,17 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "ceilsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, false); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_ceil (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) (define_insn_and_split "*fist2_ceil_1" - [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") - (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")] + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] UNSPEC_FIST_CEIL)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -17905,7 +17900,7 @@ UNSPEC_FIST_CEIL)) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) (clobber (match_scratch:XF 5 "=&1f,&1f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" @@ -17964,7 +17959,7 @@ UNSPEC_FIST_CEIL)) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "#" @@ -18013,7 +18008,7 @@ (define_expand "lceildi2" [(match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT && !flag_trapping_math" { @@ -18023,7 +18018,7 @@ (define_expand "lceilsi2" [(match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEF 1 "register_operand" "")] + (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math" { @@ -18033,8 +18028,8 @@ ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_trunc" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_TRUNC)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -18080,20 +18075,25 @@ DONE; }) -(define_expand "btruncdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "btrunc2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_trunc (operand0, operand1); else ix86_expand_truncdf_32 (operand0, operand1); @@ -18103,44 +18103,18 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_trunc (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "btruncsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_trunc (operand0, operand1); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_trunc (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_mask_pm" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] UNSPEC_FRNDINT_MASK_PM)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 @@ -18187,37 +18161,21 @@ DONE; }) -(define_expand "nearbyintdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" -{ - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_mask_pm (op0, op1)); - - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - DONE; -}) - -(define_expand "nearbyintsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] +(define_expand "nearbyint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_mask_pm (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); DONE; }) @@ -18237,8 +18195,7 @@ (use (match_operand:X87MODEF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && TARGET_C99_FUNCTIONS - && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - || TARGET_MIX_SSE_I387)" + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { rtx mask = GEN_INT (0x45); rtx val = GEN_INT (0x05); @@ -18259,9 +18216,31 @@ DONE; }) +(define_expand "signbit2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:X87MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x0200); + + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxam2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + DONE; +}) ;; Block operation instructions +(define_insn "cld" + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + (define_expand "movmemsi" [(use (match_operand:BLK 0 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" "")) @@ -18313,7 +18292,9 @@ operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); - if (TARGET_SINGLE_STRINGOP || optimize_size) + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_size) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -18332,7 +18313,7 @@ (set (match_operand 2 "register_operand" "") (match_operand 5 "" ""))])] "TARGET_SINGLE_STRINGOP || optimize_size" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strmovdi_rex_1" [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) @@ -18359,7 +18340,7 @@ (plus:SI (match_dup 3) (const_int 4)))] "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{movsl|movsd}" + "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") (set_attr "memory" "both")]) @@ -18374,7 +18355,7 @@ (plus:DI (match_dup 3) (const_int 4)))] "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{movsl|movsd}" + "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") (set_attr "memory" "both")]) @@ -18449,7 +18430,7 @@ (match_operand 3 "memory_operand" "")) (use (match_dup 4))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*rep_movdi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) @@ -18464,7 +18445,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsq|rep movsq}" + "rep movsq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18483,7 +18464,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18502,7 +18483,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsl|rep movsd}" + "rep movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18519,7 +18500,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18536,7 +18517,7 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT" - "{rep\;movsb|rep movsb}" + "rep movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -18609,7 +18590,7 @@ (set (match_operand 0 "register_operand" "") (match_operand 3 "" ""))])] "TARGET_SINGLE_STRINGOP || optimize_size" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strsetdi_rex_1" [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) @@ -18630,7 +18611,7 @@ (plus:SI (match_dup 1) (const_int 4)))] "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{stosl|stosd}" + "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) @@ -18642,7 +18623,7 @@ (plus:DI (match_dup 1) (const_int 4)))] "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" - "{stosl|stosd}" + "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) @@ -18703,7 +18684,7 @@ (use (match_operand 3 "register_operand" "")) (use (match_dup 1))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*rep_stosdi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) @@ -18716,7 +18697,7 @@ (use (match_operand:DI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosq|rep stosq}" + "rep stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18733,7 +18714,7 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18750,7 +18731,7 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosl|rep stosd}" + "rep stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18766,7 +18747,7 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18782,7 +18763,7 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT" - "{rep\;stosb|rep stosb}" + "rep stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -18799,7 +18780,7 @@ rtx addr1, addr2, out, outlow, count, countreg, align; /* Can't use this if the user has appropriated esi or edi. */ - if (global_regs[4] || global_regs[5]) + if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) FAIL; out = operands[0]; @@ -18879,7 +18860,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (match_dup 2))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*cmpstrnqi_nz_1" [(set (reg:CC FLAGS_REG) @@ -18891,7 +18872,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18906,7 +18887,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18926,7 +18907,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (match_dup 2))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*cmpstrnqi_1" [(set (reg:CC FLAGS_REG) @@ -18941,7 +18922,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (match_operand:SI 2 "register_operand" "=c"))] "!TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18959,7 +18940,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (match_operand:DI 2 "register_operand" "=c"))] "TARGET_64BIT" - "repz{\;| }cmpsb" + "repz cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -18995,7 +18976,7 @@ (clobber (match_operand 1 "register_operand" "")) (clobber (reg:CC FLAGS_REG))])] "" - "") + "ix86_current_function_needs_cld = 1;") (define_insn "*strlenqi_1" [(set (match_operand:SI 0 "register_operand" "=&c") @@ -19006,7 +18987,7 @@ (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -19020,7 +19001,7 @@ (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "repnz{\;| }scasb" + "repnz scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) @@ -19114,7 +19095,7 @@ (match_operand:DI 2 "general_operand" "") (match_operand:DI 3 "general_operand" "")))] "TARGET_64BIT" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "x86_movdicc_0_m1_rex64" [(set (match_operand:DI 0 "register_operand" "=r") @@ -19133,6 +19114,21 @@ (set_attr "mode" "DI") (set_attr "length_immediate" "0")]) +(define_insn "*x86_movdicc_0_m1_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{q}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + (define_insn "*movdicc_c_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" @@ -19153,7 +19149,7 @@ (match_operand:SI 2 "general_operand" "") (match_operand:SI 3 "general_operand" "")))] "" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. @@ -19176,6 +19172,21 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) +(define_insn "*x86_movsicc_0_m1_se" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + (define_insn "*movsicc_noc" [(set (match_operand:SI 0 "register_operand" "=r,r") (if_then_else:SI (match_operator 1 "ix86_comparison_operator" @@ -19196,7 +19207,7 @@ (match_operand:HI 2 "general_operand" "") (match_operand:HI 3 "general_operand" "")))] "TARGET_HIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn "*movhicc_noc" [(set (match_operand:HI 0 "register_operand" "=r,r") @@ -19218,7 +19229,7 @@ (match_operand:QI 2 "general_operand" "") (match_operand:QI 3 "general_operand" "")))] "TARGET_QIMODE_MATH" - "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") (define_insn_and_split "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") @@ -19240,13 +19251,15 @@ [(set_attr "type" "icmov") (set_attr "mode" "SI")]) -(define_expand "movsfcc" - [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operand 1 "comparison_operator" "") - (match_operand:SF 2 "register_operand" "") - (match_operand:SF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") +(define_expand "movcc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") (define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") @@ -19264,14 +19277,6 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) -(define_expand "movdfcc" - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operand 1 "comparison_operator" "") - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "")))] - "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" @@ -19315,22 +19320,13 @@ [(set (match_dup 2) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) (match_dup 5) - (match_dup 7))) + (match_dup 6))) (set (match_dup 3) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) - (match_dup 6) + (match_dup 7) (match_dup 8)))] - "split_di (operands+2, 1, operands+5, operands+6); - split_di (operands+3, 1, operands+7, operands+8); - split_di (operands, 1, operands+2, operands+3);") - -(define_expand "movxfcc" - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operand 1 "comparison_operator" "") - (match_operand:XF 2 "register_operand" "") - (match_operand:XF 3 "register_operand" "")))] - "TARGET_80387 && TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + "split_di (&operands[2], 2, &operands[5], &operands[7]); + split_di (&operands[0], 1, &operands[2], &operands[3]);") (define_insn "*movxfcc_1" [(set (match_operand:XF 0 "register_operand" "=f,f") @@ -19345,46 +19341,34 @@ [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) +;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; SSE5 conditional move +(define_insn "*sse5_pcmov_" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x,0") + (match_operand:MODEF 2 "register_operand" "0,x") + (match_operand:MODEF 3 "register_operand" "x,x")))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. -(define_insn "sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0") - (match_operand:SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0") - (match_operand:DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "s\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) @@ -19392,45 +19376,27 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_sminsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smaxsf3" - [(set (match_operand:SF 0 "register_operand" "=x") - (unspec:SF [(match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*ieee_smindf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) -(define_insn "*ieee_smaxdf3" - [(set (match_operand:DF 0 "register_operand" "=x") - (unspec:DF [(match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" +(define_insn "*ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) + (set_attr "mode" "")]) ;; Make two stack loads independent: ;; fld aa fld aa @@ -19459,37 +19425,13 @@ operands[1], operands[0]);") ;; Conditional addition patterns -(define_expand "addqicc" - [(match_operand:QI 0 "register_operand" "") - (match_operand 1 "comparison_operator" "") - (match_operand:QI 2 "register_operand" "") - (match_operand:QI 3 "const_int_operand" "")] - "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") - -(define_expand "addhicc" - [(match_operand:HI 0 "register_operand" "") +(define_expand "addcc" + [(match_operand:SWI 0 "register_operand" "") (match_operand 1 "comparison_operator" "") - (match_operand:HI 2 "register_operand" "") - (match_operand:HI 3 "const_int_operand" "")] + (match_operand:SWI 2 "register_operand" "") + (match_operand:SWI 3 "const_int_operand" "")] "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") - -(define_expand "addsicc" - [(match_operand:SI 0 "register_operand" "") - (match_operand 1 "comparison_operator" "") - (match_operand:SI 2 "register_operand" "") - (match_operand:SI 3 "const_int_operand" "")] - "" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") - -(define_expand "adddicc" - [(match_operand:DI 0 "register_operand" "") - (match_operand 1 "comparison_operator" "") - (match_operand:DI 2 "register_operand" "") - (match_operand:DI 3 "const_int_operand" "")] - "TARGET_64BIT" - "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") ;; Misc patterns (?) @@ -19618,12 +19560,12 @@ (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_STACK_PROBE" - "call\t__alloca" + "call\t___chkstk" [(set_attr "type" "multi") (set_attr "length" "5")]) (define_insn "allocate_stack_worker_64" - [(set (match_operand:DI 0 "register_operand" "=a") + [(set (match_operand:DI 0 "register_operand" "+a") (unspec_volatile:DI [(match_dup 0)] UNSPECV_STACK_PROBE)) (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0))) (clobber (reg:DI R10_REG)) @@ -19727,11 +19669,11 @@ (set (match_operand 1 "register_operand" "") (and (match_dup 3) (match_dup 4)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode) && ! optimize_size && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) - || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))" + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" [(parallel [(set (match_dup 0) (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) (const_int 0)])) @@ -19756,10 +19698,10 @@ (match_operand:HI 3 "const_int_operand" "")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed - /* Ensure that the operand will remain sign-extended immediate. */ - && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode) && ! TARGET_FAST_PREFIX - && ! optimize_size" + && ! optimize_size + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" [(set (match_dup 0) (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)]))] @@ -19918,8 +19860,8 @@ (set (match_operand:SI 0 "memory_operand" "") (match_operand:SI 1 "immediate_operand" ""))] "! optimize_size - && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19928,8 +19870,9 @@ [(match_scratch:HI 2 "r") (set (match_operand:HI 0 "memory_operand" "") (match_operand:HI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "! optimize_size + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19938,8 +19881,9 @@ [(match_scratch:QI 2 "q") (set (match_operand:QI 0 "memory_operand" "") (match_operand:QI 1 "immediate_operand" ""))] - "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn - && TARGET_SPLIT_LONG_MOVES" + "! optimize_size + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -19951,7 +19895,7 @@ [(match_operand:SI 2 "memory_operand" "") (const_int 0)])) (match_scratch:SI 3 "r")] - "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" + " ! optimize_size && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] "") @@ -19971,11 +19915,11 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "") (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], SImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:SI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -19985,11 +19929,11 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "") (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], HImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:HI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -19999,11 +19943,11 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "") (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] "!optimize_size - && peep2_regno_dead_p (0, FLAGS_REG) && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], QImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode)))" + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:QI (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] @@ -20023,7 +19967,7 @@ (match_operand:SI 3 "immediate_operand" "")) (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[2]) != 0 + && (true_regnum (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel @@ -20045,7 +19989,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != 0 + && true_regnum (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) @@ -20067,7 +20011,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != 0 + && true_regnum (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 @@ -20856,7 +20800,7 @@ (call (mem:QI (reg:DI R11_REG)) (match_operand:DI 1 "" "")))] "SIBLING_CALL_P (insn) && TARGET_64BIT" - "jmp\t*%%r11" + "jmp\t{*%%}r11" [(set_attr "type" "callv")]) ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. @@ -20903,30 +20847,28 @@ "TARGET_64BIT && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" - "* { int i; operands[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, operands[0], operands[4])); - output_asm_insn (\"jmp\\t%A1\", operands); + output_asm_insn ("jmp\t%A1", operands); for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) { operands[4] = adjust_address (operands[0], DImode, i*16); operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); PUT_MODE (operands[4], TImode); if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn (\"rex\", operands); - output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + output_asm_insn ("rex", operands); + output_asm_insn ("movaps\t{%5, %4|%4, %5}", operands); } - (*targetm.asm_out.internal_label) (asm_out_file, \"L\", - CODE_LABEL_NUMBER (operands[3])); - return \"\"; + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[3])); + return ""; } - " [(set_attr "type" "other") (set_attr "length_immediate" "0") (set_attr "length_address" "0") - (set_attr "length" "135") + (set_attr "length" "34") (set_attr "memory" "store") (set_attr "modrm" "0") (set_attr "mode" "DI")]) @@ -21064,7 +21006,7 @@ (set (match_scratch:SI 2 "=&r") (const_int 0)) (clobber (reg:CC FLAGS_REG))] "" - "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" [(set_attr "type" "multi")]) (define_insn "stack_tls_protect_set_di" @@ -21078,9 +21020,9 @@ system call would not have to trash the userspace segment register, which would be expensive */ if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; else - return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; } [(set_attr "type" "multi")]) @@ -21139,7 +21081,7 @@ UNSPEC_SP_TLS_TEST)) (clobber (match_scratch:SI 3 "=r"))] "" - "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}" [(set_attr "type" "multi")]) (define_insn "stack_tls_protect_test_di" @@ -21154,12 +21096,42 @@ system call would not have to trash the userspace segment register, which would be expensive */ if (ix86_cmodel != CM_KERNEL) - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"; + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}"; else - return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}"; + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}"; } [(set_attr "type" "multi")]) +(define_mode_iterator CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32q\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + (include "mmx.md") (include "sse.md") (include "sync.md")