1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [V8SI V4SI V4DI V2DI])
131 (define_mode_iterator VI4SD_AVX2
134 (define_mode_iterator V48_AVX2
137 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
138 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
140 (define_mode_attr sse2_avx2
141 [(V16QI "sse2") (V32QI "avx2")
142 (V8HI "sse2") (V16HI "avx2")
143 (V4SI "sse2") (V8SI "avx2")
144 (V2DI "sse2") (V4DI "avx2")
145 (V1TI "sse2") (V2TI "avx2")])
147 (define_mode_attr ssse3_avx2
148 [(V16QI "ssse3") (V32QI "avx2")
149 (V8HI "ssse3") (V16HI "avx2")
150 (V4SI "ssse3") (V8SI "avx2")
151 (V2DI "ssse3") (V4DI "avx2")
152 (TI "ssse3") (V2TI "avx2")])
154 (define_mode_attr sse4_1_avx2
155 [(V16QI "sse4_1") (V32QI "avx2")
156 (V8HI "sse4_1") (V16HI "avx2")
157 (V4SI "sse4_1") (V8SI "avx2")
158 (V2DI "sse4_1") (V4DI "avx2")])
160 (define_mode_attr avx_avx2
161 [(V4SF "avx") (V2DF "avx")
162 (V8SF "avx") (V4DF "avx")
163 (V4SI "avx2") (V2DI "avx2")
164 (V8SI "avx2") (V4DI "avx2")])
166 (define_mode_attr vec_avx2
167 [(V16QI "vec") (V32QI "avx2")
168 (V8HI "vec") (V16HI "avx2")
169 (V4SI "vec") (V8SI "avx2")
170 (V2DI "vec") (V4DI "avx2")])
172 ;; Mapping of logic-shift operators
173 (define_code_iterator lshift [lshiftrt ashift])
175 ;; Base name for define_insn
176 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
178 ;; Base name for insn mnemonic
179 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
181 (define_mode_attr ssedoublemode
182 [(V16HI "V16SI") (V8HI "V8SI")])
184 (define_mode_attr ssebytemode
185 [(V4DI "V32QI") (V2DI "V16QI")])
187 ;; All 128bit vector integer modes
188 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
190 ;; All 256bit vector integer modes
191 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
193 ;; Random 128bit vector integer mode combinations
194 (define_mode_iterator VI12_128 [V16QI V8HI])
195 (define_mode_iterator VI14_128 [V16QI V4SI])
196 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
197 (define_mode_iterator VI24_128 [V8HI V4SI])
198 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
200 ;; Random 256bit vector integer mode combinations
201 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
202 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
204 ;; Int-float size matches
205 (define_mode_iterator VI4F_128 [V4SI V4SF])
206 (define_mode_iterator VI8F_128 [V2DI V2DF])
207 (define_mode_iterator VI4F_256 [V8SI V8SF])
208 (define_mode_iterator VI8F_256 [V4DI V4DF])
210 ;; Mapping from float mode to required SSE level
211 (define_mode_attr sse
212 [(SF "sse") (DF "sse2")
213 (V4SF "sse") (V2DF "sse2")
214 (V8SF "avx") (V4DF "avx")])
216 (define_mode_attr sse2
217 [(V16QI "sse2") (V32QI "avx")
218 (V2DI "sse2") (V4DI "avx")])
220 (define_mode_attr sse3
221 [(V16QI "sse3") (V32QI "avx")])
223 (define_mode_attr sse4_1
224 [(V4SF "sse4_1") (V2DF "sse4_1")
225 (V8SF "avx") (V4DF "avx")])
227 (define_mode_attr avxsizesuffix
228 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
229 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
230 (V8SF "256") (V4DF "256")
231 (V4SF "") (V2DF "")])
233 ;; SSE instruction mode
234 (define_mode_attr sseinsnmode
235 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
236 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
237 (V8SF "V8SF") (V4DF "V4DF")
238 (V4SF "V4SF") (V2DF "V2DF")
241 ;; Mapping of vector float modes to an integer mode of the same size
242 (define_mode_attr sseintvecmode
243 [(V8SF "V8SI") (V4DF "V4DI")
244 (V4SF "V4SI") (V2DF "V2DI")
245 (V4DF "V4DI") (V8SF "V8SI")
246 (V8SI "V8SI") (V4DI "V4DI")
247 (V4SI "V4SI") (V2DI "V2DI")
248 (V16HI "V16HI") (V8HI "V8HI")
249 (V32QI "V32QI") (V16QI "V16QI")
252 ;; Mapping of vector modes to a vector mode of double size
253 (define_mode_attr ssedoublevecmode
254 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
255 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
256 (V8SF "V16SF") (V4DF "V8DF")
257 (V4SF "V8SF") (V2DF "V4DF")])
259 ;; Mapping of vector modes to a vector mode of half size
260 (define_mode_attr ssehalfvecmode
261 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
262 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
263 (V8SF "V4SF") (V4DF "V2DF")
266 ;; Mapping of vector modes back to the scalar modes
267 (define_mode_attr ssescalarmode
268 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
269 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
270 (V8SF "SF") (V4DF "DF")
271 (V4SF "SF") (V2DF "DF")])
273 ;; Number of scalar elements in each vector type
274 (define_mode_attr ssescalarnum
275 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
276 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
277 (V8SF "8") (V4DF "4")
278 (V4SF "4") (V2DF "2")])
280 ;; SSE scalar suffix for vector modes
281 (define_mode_attr ssescalarmodesuffix
283 (V8SF "ss") (V4DF "sd")
284 (V4SF "ss") (V2DF "sd")
285 (V8SI "ss") (V4DI "sd")
288 ;; Pack/unpack vector modes
289 (define_mode_attr sseunpackmode
290 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
291 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
293 (define_mode_attr ssepackmode
294 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
295 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
297 ;; Mapping of the max integer size for xop rotate immediate constraint
298 (define_mode_attr sserotatemax
299 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
301 ;; Mapping of mode to cast intrinsic name
302 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
304 ;; Instruction suffix for sign and zero extensions.
305 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
307 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
308 (define_mode_attr i128
309 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
310 (V8SI "%~128") (V4DI "%~128")])
313 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
315 (define_mode_iterator AVXMODE48P_DI
316 [V2DI V2DF V4DI V4DF V4SF V4SI])
317 (define_mode_attr AVXMODE48P_DI
318 [(V2DI "V2DI") (V2DF "V2DI")
319 (V4DI "V4DI") (V4DF "V4DI")
320 (V4SI "V2DI") (V4SF "V2DI")
321 (V8SI "V4DI") (V8SF "V4DI")])
322 (define_mode_attr gthrfirstp
323 [(V2DI "p") (V2DF "")
326 (V8SI "p") (V8SF "")])
327 (define_mode_attr gthrlastp
328 [(V2DI "q") (V2DF "pd")
329 (V4DI "q") (V4DF "pd")
330 (V4SI "d") (V4SF "ps")
331 (V8SI "d") (V8SF "ps")])
333 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
335 ;; Mapping of immediate bits for blend instructions
336 (define_mode_attr blendbits
337 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
339 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;; All of these patterns are enabled for SSE1 as well as SSE2.
348 ;; This is essential for maintaining stable calling conventions.
350 (define_expand "mov<mode>"
351 [(set (match_operand:V16 0 "nonimmediate_operand" "")
352 (match_operand:V16 1 "nonimmediate_operand" ""))]
355 ix86_expand_vector_move (<MODE>mode, operands);
359 (define_insn "*mov<mode>_internal"
360 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
361 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
363 && (register_operand (operands[0], <MODE>mode)
364 || register_operand (operands[1], <MODE>mode))"
366 switch (which_alternative)
369 return standard_sse_constant_opcode (insn, operands[1]);
372 switch (get_attr_mode (insn))
377 && (misaligned_operand (operands[0], <MODE>mode)
378 || misaligned_operand (operands[1], <MODE>mode)))
379 return "vmovups\t{%1, %0|%0, %1}";
381 return "%vmovaps\t{%1, %0|%0, %1}";
386 && (misaligned_operand (operands[0], <MODE>mode)
387 || misaligned_operand (operands[1], <MODE>mode)))
388 return "vmovupd\t{%1, %0|%0, %1}";
389 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
390 return "%vmovaps\t{%1, %0|%0, %1}";
392 return "%vmovapd\t{%1, %0|%0, %1}";
397 && (misaligned_operand (operands[0], <MODE>mode)
398 || misaligned_operand (operands[1], <MODE>mode)))
399 return "vmovdqu\t{%1, %0|%0, %1}";
400 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
401 return "%vmovaps\t{%1, %0|%0, %1}";
403 return "%vmovdqa\t{%1, %0|%0, %1}";
412 [(set_attr "type" "sselog1,ssemov,ssemov")
413 (set_attr "prefix" "maybe_vex")
415 (cond [(match_test "TARGET_AVX")
416 (const_string "<sseinsnmode>")
417 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
418 (not (match_test "TARGET_SSE2")))
419 (and (eq_attr "alternative" "2")
420 (match_test "TARGET_SSE_TYPELESS_STORES")))
421 (const_string "V4SF")
422 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
423 (const_string "V4SF")
424 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
425 (const_string "V2DF")
427 (const_string "TI")))])
429 (define_insn "sse2_movq128"
430 [(set (match_operand:V2DI 0 "register_operand" "=x")
433 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
434 (parallel [(const_int 0)]))
437 "%vmovq\t{%1, %0|%0, %1}"
438 [(set_attr "type" "ssemov")
439 (set_attr "prefix" "maybe_vex")
440 (set_attr "mode" "TI")])
442 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
443 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
444 ;; from memory, we'd prefer to load the memory directly into the %xmm
445 ;; register. To facilitate this happy circumstance, this pattern won't
446 ;; split until after register allocation. If the 64-bit value didn't
447 ;; come from memory, this is the best we can do. This is much better
448 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
451 (define_insn_and_split "movdi_to_sse"
453 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
454 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
455 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
456 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
458 "&& reload_completed"
461 if (register_operand (operands[1], DImode))
463 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
464 Assemble the 64-bit DImode value in an xmm register. */
465 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
466 gen_rtx_SUBREG (SImode, operands[1], 0)));
467 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
468 gen_rtx_SUBREG (SImode, operands[1], 4)));
469 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
472 else if (memory_operand (operands[1], DImode))
473 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
474 operands[1], const0_rtx));
480 [(set (match_operand:V4SF 0 "register_operand" "")
481 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
482 "TARGET_SSE && reload_completed"
485 (vec_duplicate:V4SF (match_dup 1))
489 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
490 operands[2] = CONST0_RTX (V4SFmode);
494 [(set (match_operand:V2DF 0 "register_operand" "")
495 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
496 "TARGET_SSE2 && reload_completed"
497 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
499 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
500 operands[2] = CONST0_RTX (DFmode);
503 (define_expand "push<mode>1"
504 [(match_operand:V16 0 "register_operand" "")]
507 ix86_expand_push (<MODE>mode, operands[0]);
511 (define_expand "movmisalign<mode>"
512 [(set (match_operand:V16 0 "nonimmediate_operand" "")
513 (match_operand:V16 1 "nonimmediate_operand" ""))]
516 ix86_expand_vector_move_misalign (<MODE>mode, operands);
520 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
521 [(set (match_operand:VF 0 "nonimmediate_operand" "")
523 [(match_operand:VF 1 "nonimmediate_operand" "")]
527 if (MEM_P (operands[0]) && MEM_P (operands[1]))
528 operands[1] = force_reg (<MODE>mode, operands[1]);
531 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
532 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
534 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
536 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
537 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
538 [(set_attr "type" "ssemov")
539 (set_attr "movu" "1")
540 (set_attr "prefix" "maybe_vex")
541 (set_attr "mode" "<MODE>")])
543 (define_expand "<sse2>_movdqu<avxsizesuffix>"
544 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
545 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
549 if (MEM_P (operands[0]) && MEM_P (operands[1]))
550 operands[1] = force_reg (<MODE>mode, operands[1]);
553 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
554 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
555 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
557 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
558 "%vmovdqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set (attr "prefix_data16")
563 (match_test "TARGET_AVX")
566 (set_attr "prefix" "maybe_vex")
567 (set_attr "mode" "<sseinsnmode>")])
569 (define_insn "<sse3>_lddqu<avxsizesuffix>"
570 [(set (match_operand:VI1 0 "register_operand" "=x")
571 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
574 "%vlddqu\t{%1, %0|%0, %1}"
575 [(set_attr "type" "ssemov")
576 (set_attr "movu" "1")
577 (set (attr "prefix_data16")
579 (match_test "TARGET_AVX")
582 (set (attr "prefix_rep")
584 (match_test "TARGET_AVX")
587 (set_attr "prefix" "maybe_vex")
588 (set_attr "mode" "<sseinsnmode>")])
590 (define_insn "sse2_movntsi"
591 [(set (match_operand:SI 0 "memory_operand" "=m")
592 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
595 "movnti\t{%1, %0|%0, %1}"
596 [(set_attr "type" "ssemov")
597 (set_attr "prefix_data16" "0")
598 (set_attr "mode" "V2DF")])
600 (define_insn "<sse>_movnt<mode>"
601 [(set (match_operand:VF 0 "memory_operand" "=m")
602 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
605 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
606 [(set_attr "type" "ssemov")
607 (set_attr "prefix" "maybe_vex")
608 (set_attr "mode" "<MODE>")])
610 (define_insn "<sse2>_movnt<mode>"
611 [(set (match_operand:VI8 0 "memory_operand" "=m")
612 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
615 "%vmovntdq\t{%1, %0|%0, %1}"
616 [(set_attr "type" "ssecvt")
617 (set (attr "prefix_data16")
619 (match_test "TARGET_AVX")
622 (set_attr "prefix" "maybe_vex")
623 (set_attr "mode" "<sseinsnmode>")])
625 ; Expand patterns for non-temporal stores. At the moment, only those
626 ; that directly map to insns are defined; it would be possible to
627 ; define patterns for other modes that would expand to several insns.
629 ;; Modes handled by storent patterns.
630 (define_mode_iterator STORENT_MODE
631 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
633 (V8SF "TARGET_AVX") V4SF
634 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
636 (define_expand "storent<mode>"
637 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
639 [(match_operand:STORENT_MODE 1 "register_operand" "")]
643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
645 ;; Parallel floating point arithmetic
647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
649 (define_expand "<code><mode>2"
650 [(set (match_operand:VF 0 "register_operand" "")
652 (match_operand:VF 1 "register_operand" "")))]
654 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
656 (define_insn_and_split "*absneg<mode>2"
657 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
658 (match_operator:VF 3 "absneg_operator"
659 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
660 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
663 "&& reload_completed"
666 enum rtx_code absneg_op;
672 if (MEM_P (operands[1]))
673 op1 = operands[2], op2 = operands[1];
675 op1 = operands[1], op2 = operands[2];
680 if (rtx_equal_p (operands[0], operands[1]))
686 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
687 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
688 t = gen_rtx_SET (VOIDmode, operands[0], t);
692 [(set_attr "isa" "noavx,noavx,avx,avx")])
694 (define_expand "<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "")
697 (match_operand:VF 1 "nonimmediate_operand" "")
698 (match_operand:VF 2 "nonimmediate_operand" "")))]
700 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
702 (define_insn "*<plusminus_insn><mode>3"
703 [(set (match_operand:VF 0 "register_operand" "=x,x")
705 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
706 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
707 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
709 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
710 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
711 [(set_attr "isa" "noavx,avx")
712 (set_attr "type" "sseadd")
713 (set_attr "prefix" "orig,vex")
714 (set_attr "mode" "<MODE>")])
716 (define_insn "<sse>_vm<plusminus_insn><mode>3"
717 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
720 (match_operand:VF_128 1 "register_operand" "0,x")
721 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
726 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
727 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
728 [(set_attr "isa" "noavx,avx")
729 (set_attr "type" "sseadd")
730 (set_attr "prefix" "orig,vex")
731 (set_attr "mode" "<ssescalarmode>")])
733 (define_expand "mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "")
736 (match_operand:VF 1 "nonimmediate_operand" "")
737 (match_operand:VF 2 "nonimmediate_operand" "")))]
739 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
741 (define_insn "*mul<mode>3"
742 [(set (match_operand:VF 0 "register_operand" "=x,x")
744 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
745 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
746 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
748 mul<ssemodesuffix>\t{%2, %0|%0, %2}
749 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
750 [(set_attr "isa" "noavx,avx")
751 (set_attr "type" "ssemul")
752 (set_attr "prefix" "orig,vex")
753 (set_attr "mode" "<MODE>")])
755 (define_insn "<sse>_vmmul<mode>3"
756 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
759 (match_operand:VF_128 1 "register_operand" "0,x")
760 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
765 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
766 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
767 [(set_attr "isa" "noavx,avx")
768 (set_attr "type" "ssemul")
769 (set_attr "prefix" "orig,vex")
770 (set_attr "mode" "<ssescalarmode>")])
772 (define_expand "div<mode>3"
773 [(set (match_operand:VF2 0 "register_operand" "")
774 (div:VF2 (match_operand:VF2 1 "register_operand" "")
775 (match_operand:VF2 2 "nonimmediate_operand" "")))]
777 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
779 (define_expand "div<mode>3"
780 [(set (match_operand:VF1 0 "register_operand" "")
781 (div:VF1 (match_operand:VF1 1 "register_operand" "")
782 (match_operand:VF1 2 "nonimmediate_operand" "")))]
785 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
788 && TARGET_RECIP_VEC_DIV
789 && !optimize_insn_for_size_p ()
790 && flag_finite_math_only && !flag_trapping_math
791 && flag_unsafe_math_optimizations)
793 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
798 (define_insn "<sse>_div<mode>3"
799 [(set (match_operand:VF 0 "register_operand" "=x,x")
801 (match_operand:VF 1 "register_operand" "0,x")
802 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
805 div<ssemodesuffix>\t{%2, %0|%0, %2}
806 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
807 [(set_attr "isa" "noavx,avx")
808 (set_attr "type" "ssediv")
809 (set_attr "prefix" "orig,vex")
810 (set_attr "mode" "<MODE>")])
812 (define_insn "<sse>_vmdiv<mode>3"
813 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
816 (match_operand:VF_128 1 "register_operand" "0,x")
817 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
822 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
823 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
824 [(set_attr "isa" "noavx,avx")
825 (set_attr "type" "ssediv")
826 (set_attr "prefix" "orig,vex")
827 (set_attr "mode" "<ssescalarmode>")])
829 (define_insn "<sse>_rcp<mode>2"
830 [(set (match_operand:VF1 0 "register_operand" "=x")
832 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
834 "%vrcpps\t{%1, %0|%0, %1}"
835 [(set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "prefix" "maybe_vex")
838 (set_attr "mode" "<MODE>")])
840 (define_insn "sse_vmrcpv4sf2"
841 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
843 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
845 (match_operand:V4SF 2 "register_operand" "0,x")
849 rcpss\t{%1, %0|%0, %1}
850 vrcpss\t{%1, %2, %0|%0, %2, %1}"
851 [(set_attr "isa" "noavx,avx")
852 (set_attr "type" "sse")
853 (set_attr "atom_sse_attr" "rcp")
854 (set_attr "prefix" "orig,vex")
855 (set_attr "mode" "SF")])
857 (define_expand "sqrt<mode>2"
858 [(set (match_operand:VF2 0 "register_operand" "")
859 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
862 (define_expand "sqrt<mode>2"
863 [(set (match_operand:VF1 0 "register_operand" "")
864 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
868 && TARGET_RECIP_VEC_SQRT
869 && !optimize_insn_for_size_p ()
870 && flag_finite_math_only && !flag_trapping_math
871 && flag_unsafe_math_optimizations)
873 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
878 (define_insn "<sse>_sqrt<mode>2"
879 [(set (match_operand:VF 0 "register_operand" "=x")
880 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
882 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
883 [(set_attr "type" "sse")
884 (set_attr "atom_sse_attr" "sqrt")
885 (set_attr "prefix" "maybe_vex")
886 (set_attr "mode" "<MODE>")])
888 (define_insn "<sse>_vmsqrt<mode>2"
889 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
892 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
893 (match_operand:VF_128 2 "register_operand" "0,x")
897 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
898 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "isa" "noavx,avx")
900 (set_attr "type" "sse")
901 (set_attr "atom_sse_attr" "sqrt")
902 (set_attr "prefix" "orig,vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_expand "rsqrt<mode>2"
906 [(set (match_operand:VF1 0 "register_operand" "")
908 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
915 (define_insn "<sse>_rsqrt<mode>2"
916 [(set (match_operand:VF1 0 "register_operand" "=x")
918 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "<MODE>")])
925 (define_insn "sse_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
930 (match_operand:V4SF 2 "register_operand" "0,x")
934 rsqrtss\t{%1, %0|%0, %1}
935 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
936 [(set_attr "isa" "noavx,avx")
937 (set_attr "type" "sse")
938 (set_attr "prefix" "orig,vex")
939 (set_attr "mode" "SF")])
941 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
942 ;; isn't really correct, as those rtl operators aren't defined when
943 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
945 (define_expand "<code><mode>3"
946 [(set (match_operand:VF 0 "register_operand" "")
948 (match_operand:VF 1 "nonimmediate_operand" "")
949 (match_operand:VF 2 "nonimmediate_operand" "")))]
952 if (!flag_finite_math_only)
953 operands[1] = force_reg (<MODE>mode, operands[1]);
954 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
957 (define_insn "*<code><mode>3_finite"
958 [(set (match_operand:VF 0 "register_operand" "=x,x")
960 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
961 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
962 "TARGET_SSE && flag_finite_math_only
963 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
965 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
966 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
967 [(set_attr "isa" "noavx,avx")
968 (set_attr "type" "sseadd")
969 (set_attr "prefix" "orig,vex")
970 (set_attr "mode" "<MODE>")])
972 (define_insn "*<code><mode>3"
973 [(set (match_operand:VF 0 "register_operand" "=x,x")
975 (match_operand:VF 1 "register_operand" "0,x")
976 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
977 "TARGET_SSE && !flag_finite_math_only"
979 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
980 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
981 [(set_attr "isa" "noavx,avx")
982 (set_attr "type" "sseadd")
983 (set_attr "prefix" "orig,vex")
984 (set_attr "mode" "<MODE>")])
986 (define_insn "<sse>_vm<code><mode>3"
987 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
990 (match_operand:VF_128 1 "register_operand" "0,x")
991 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
996 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
997 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
998 [(set_attr "isa" "noavx,avx")
999 (set_attr "type" "sse")
1000 (set_attr "prefix" "orig,vex")
1001 (set_attr "mode" "<ssescalarmode>")])
1003 ;; These versions of the min/max patterns implement exactly the operations
1004 ;; min = (op1 < op2 ? op1 : op2)
1005 ;; max = (!(op1 < op2) ? op1 : op2)
1006 ;; Their operands are not commutative, and thus they may be used in the
1007 ;; presence of -0.0 and NaN.
1009 (define_insn "*ieee_smin<mode>3"
1010 [(set (match_operand:VF 0 "register_operand" "=x,x")
1012 [(match_operand:VF 1 "register_operand" "0,x")
1013 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1017 min<ssemodesuffix>\t{%2, %0|%0, %2}
1018 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "isa" "noavx,avx")
1020 (set_attr "type" "sseadd")
1021 (set_attr "prefix" "orig,vex")
1022 (set_attr "mode" "<MODE>")])
1024 (define_insn "*ieee_smax<mode>3"
1025 [(set (match_operand:VF 0 "register_operand" "=x,x")
1027 [(match_operand:VF 1 "register_operand" "0,x")
1028 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1032 max<ssemodesuffix>\t{%2, %0|%0, %2}
1033 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1034 [(set_attr "isa" "noavx,avx")
1035 (set_attr "type" "sseadd")
1036 (set_attr "prefix" "orig,vex")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "avx_addsubv4df3"
1040 [(set (match_operand:V4DF 0 "register_operand" "=x")
1043 (match_operand:V4DF 1 "register_operand" "x")
1044 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1045 (minus:V4DF (match_dup 1) (match_dup 2))
1048 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1049 [(set_attr "type" "sseadd")
1050 (set_attr "prefix" "vex")
1051 (set_attr "mode" "V4DF")])
1053 (define_insn "sse3_addsubv2df3"
1054 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1057 (match_operand:V2DF 1 "register_operand" "0,x")
1058 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1059 (minus:V2DF (match_dup 1) (match_dup 2))
1063 addsubpd\t{%2, %0|%0, %2}
1064 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "isa" "noavx,avx")
1066 (set_attr "type" "sseadd")
1067 (set_attr "atom_unit" "complex")
1068 (set_attr "prefix" "orig,vex")
1069 (set_attr "mode" "V2DF")])
1071 (define_insn "avx_addsubv8sf3"
1072 [(set (match_operand:V8SF 0 "register_operand" "=x")
1075 (match_operand:V8SF 1 "register_operand" "x")
1076 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1077 (minus:V8SF (match_dup 1) (match_dup 2))
1080 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "prefix" "vex")
1083 (set_attr "mode" "V8SF")])
1085 (define_insn "sse3_addsubv4sf3"
1086 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1089 (match_operand:V4SF 1 "register_operand" "0,x")
1090 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1091 (minus:V4SF (match_dup 1) (match_dup 2))
1095 addsubps\t{%2, %0|%0, %2}
1096 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1097 [(set_attr "isa" "noavx,avx")
1098 (set_attr "type" "sseadd")
1099 (set_attr "prefix" "orig,vex")
1100 (set_attr "prefix_rep" "1,*")
1101 (set_attr "mode" "V4SF")])
1103 (define_insn "avx_h<plusminus_insn>v4df3"
1104 [(set (match_operand:V4DF 0 "register_operand" "=x")
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (parallel [(const_int 0)]))
1111 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1113 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1114 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1118 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1119 (parallel [(const_int 0)]))
1120 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1122 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1123 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1125 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1126 [(set_attr "type" "sseadd")
1127 (set_attr "prefix" "vex")
1128 (set_attr "mode" "V4DF")])
1130 (define_insn "sse3_h<plusminus_insn>v2df3"
1131 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1135 (match_operand:V2DF 1 "register_operand" "0,x")
1136 (parallel [(const_int 0)]))
1137 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1140 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1141 (parallel [(const_int 0)]))
1142 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1145 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1146 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "isa" "noavx,avx")
1148 (set_attr "type" "sseadd")
1149 (set_attr "prefix" "orig,vex")
1150 (set_attr "mode" "V2DF")])
1152 (define_insn "avx_h<plusminus_insn>v8sf3"
1153 [(set (match_operand:V8SF 0 "register_operand" "=x")
1159 (match_operand:V8SF 1 "register_operand" "x")
1160 (parallel [(const_int 0)]))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1163 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1168 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1169 (parallel [(const_int 0)]))
1170 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1173 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1177 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1178 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1180 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1181 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1184 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1185 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1187 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1188 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1190 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "prefix" "vex")
1193 (set_attr "mode" "V8SF")])
1195 (define_insn "sse3_h<plusminus_insn>v4sf3"
1196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1201 (match_operand:V4SF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1205 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1206 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1210 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1211 (parallel [(const_int 0)]))
1212 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1214 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1215 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1218 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1219 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1220 [(set_attr "isa" "noavx,avx")
1221 (set_attr "type" "sseadd")
1222 (set_attr "atom_unit" "complex")
1223 (set_attr "prefix" "orig,vex")
1224 (set_attr "prefix_rep" "1,*")
1225 (set_attr "mode" "V4SF")])
1227 (define_expand "reduc_splus_v4df"
1228 [(match_operand:V4DF 0 "register_operand" "")
1229 (match_operand:V4DF 1 "register_operand" "")]
1232 rtx tmp = gen_reg_rtx (V4DFmode);
1233 rtx tmp2 = gen_reg_rtx (V4DFmode);
1234 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1235 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1236 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1240 (define_expand "reduc_splus_v2df"
1241 [(match_operand:V2DF 0 "register_operand" "")
1242 (match_operand:V2DF 1 "register_operand" "")]
1245 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1249 (define_expand "reduc_splus_v8sf"
1250 [(match_operand:V8SF 0 "register_operand" "")
1251 (match_operand:V8SF 1 "register_operand" "")]
1254 rtx tmp = gen_reg_rtx (V8SFmode);
1255 rtx tmp2 = gen_reg_rtx (V8SFmode);
1256 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1257 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1258 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1259 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1263 (define_expand "reduc_splus_v4sf"
1264 [(match_operand:V4SF 0 "register_operand" "")
1265 (match_operand:V4SF 1 "register_operand" "")]
1270 rtx tmp = gen_reg_rtx (V4SFmode);
1271 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1272 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1275 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1279 ;; Modes handled by reduc_sm{in,ax}* patterns.
1280 (define_mode_iterator REDUC_SMINMAX_MODE
1281 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1282 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1283 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1284 (V4SF "TARGET_SSE")])
1286 (define_expand "reduc_<code>_<mode>"
1287 [(smaxmin:REDUC_SMINMAX_MODE
1288 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1289 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1292 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1296 (define_expand "reduc_<code>_<mode>"
1298 (match_operand:VI_256 0 "register_operand" "")
1299 (match_operand:VI_256 1 "register_operand" ""))]
1302 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1306 (define_expand "reduc_umin_v8hi"
1308 (match_operand:V8HI 0 "register_operand" "")
1309 (match_operand:V8HI 1 "register_operand" ""))]
1312 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1318 ;; Parallel floating point comparisons
1320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1322 (define_insn "avx_cmp<mode>3"
1323 [(set (match_operand:VF 0 "register_operand" "=x")
1325 [(match_operand:VF 1 "register_operand" "x")
1326 (match_operand:VF 2 "nonimmediate_operand" "xm")
1327 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1330 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1331 [(set_attr "type" "ssecmp")
1332 (set_attr "length_immediate" "1")
1333 (set_attr "prefix" "vex")
1334 (set_attr "mode" "<MODE>")])
1336 (define_insn "avx_vmcmp<mode>3"
1337 [(set (match_operand:VF_128 0 "register_operand" "=x")
1340 [(match_operand:VF_128 1 "register_operand" "x")
1341 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1342 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1347 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1348 [(set_attr "type" "ssecmp")
1349 (set_attr "length_immediate" "1")
1350 (set_attr "prefix" "vex")
1351 (set_attr "mode" "<ssescalarmode>")])
1353 (define_insn "*<sse>_maskcmp<mode>3_comm"
1354 [(set (match_operand:VF 0 "register_operand" "=x,x")
1355 (match_operator:VF 3 "sse_comparison_operator"
1356 [(match_operand:VF 1 "register_operand" "%0,x")
1357 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1359 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1361 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1362 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1363 [(set_attr "isa" "noavx,avx")
1364 (set_attr "type" "ssecmp")
1365 (set_attr "length_immediate" "1")
1366 (set_attr "prefix" "orig,vex")
1367 (set_attr "mode" "<MODE>")])
1369 (define_insn "<sse>_maskcmp<mode>3"
1370 [(set (match_operand:VF 0 "register_operand" "=x,x")
1371 (match_operator:VF 3 "sse_comparison_operator"
1372 [(match_operand:VF 1 "register_operand" "0,x")
1373 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1376 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1377 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1378 [(set_attr "isa" "noavx,avx")
1379 (set_attr "type" "ssecmp")
1380 (set_attr "length_immediate" "1")
1381 (set_attr "prefix" "orig,vex")
1382 (set_attr "mode" "<MODE>")])
1384 (define_insn "<sse>_vmmaskcmp<mode>3"
1385 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1387 (match_operator:VF_128 3 "sse_comparison_operator"
1388 [(match_operand:VF_128 1 "register_operand" "0,x")
1389 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1394 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1395 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1396 [(set_attr "isa" "noavx,avx")
1397 (set_attr "type" "ssecmp")
1398 (set_attr "length_immediate" "1,*")
1399 (set_attr "prefix" "orig,vex")
1400 (set_attr "mode" "<ssescalarmode>")])
1402 (define_insn "<sse>_comi"
1403 [(set (reg:CCFP FLAGS_REG)
1406 (match_operand:<ssevecmode> 0 "register_operand" "x")
1407 (parallel [(const_int 0)]))
1409 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1410 (parallel [(const_int 0)]))))]
1411 "SSE_FLOAT_MODE_P (<MODE>mode)"
1412 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1413 [(set_attr "type" "ssecomi")
1414 (set_attr "prefix" "maybe_vex")
1415 (set_attr "prefix_rep" "0")
1416 (set (attr "prefix_data16")
1417 (if_then_else (eq_attr "mode" "DF")
1419 (const_string "0")))
1420 (set_attr "mode" "<MODE>")])
1422 (define_insn "<sse>_ucomi"
1423 [(set (reg:CCFPU FLAGS_REG)
1426 (match_operand:<ssevecmode> 0 "register_operand" "x")
1427 (parallel [(const_int 0)]))
1429 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1430 (parallel [(const_int 0)]))))]
1431 "SSE_FLOAT_MODE_P (<MODE>mode)"
1432 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1433 [(set_attr "type" "ssecomi")
1434 (set_attr "prefix" "maybe_vex")
1435 (set_attr "prefix_rep" "0")
1436 (set (attr "prefix_data16")
1437 (if_then_else (eq_attr "mode" "DF")
1439 (const_string "0")))
1440 (set_attr "mode" "<MODE>")])
1442 (define_expand "vcond<V_256:mode><VF_256:mode>"
1443 [(set (match_operand:V_256 0 "register_operand" "")
1445 (match_operator 3 ""
1446 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1447 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1448 (match_operand:V_256 1 "general_operand" "")
1449 (match_operand:V_256 2 "general_operand" "")))]
1451 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1452 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1454 bool ok = ix86_expand_fp_vcond (operands);
1459 (define_expand "vcond<V_128:mode><VF_128:mode>"
1460 [(set (match_operand:V_128 0 "register_operand" "")
1462 (match_operator 3 ""
1463 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1464 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1465 (match_operand:V_128 1 "general_operand" "")
1466 (match_operand:V_128 2 "general_operand" "")))]
1468 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1469 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1471 bool ok = ix86_expand_fp_vcond (operands);
1476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1478 ;; Parallel floating point logical operations
1480 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1482 (define_insn "<sse>_andnot<mode>3"
1483 [(set (match_operand:VF 0 "register_operand" "=x,x")
1486 (match_operand:VF 1 "register_operand" "0,x"))
1487 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1490 static char buf[32];
1493 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1495 switch (which_alternative)
1498 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1501 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1507 snprintf (buf, sizeof (buf), insn, suffix);
1510 [(set_attr "isa" "noavx,avx")
1511 (set_attr "type" "sselog")
1512 (set_attr "prefix" "orig,vex")
1513 (set_attr "mode" "<MODE>")])
1515 (define_expand "<code><mode>3"
1516 [(set (match_operand:VF 0 "register_operand" "")
1518 (match_operand:VF 1 "nonimmediate_operand" "")
1519 (match_operand:VF 2 "nonimmediate_operand" "")))]
1521 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1523 (define_insn "*<code><mode>3"
1524 [(set (match_operand:VF 0 "register_operand" "=x,x")
1526 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1527 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1528 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1530 static char buf[32];
1533 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1535 switch (which_alternative)
1538 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1541 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1547 snprintf (buf, sizeof (buf), insn, suffix);
1550 [(set_attr "isa" "noavx,avx")
1551 (set_attr "type" "sselog")
1552 (set_attr "prefix" "orig,vex")
1553 (set_attr "mode" "<MODE>")])
1555 (define_expand "copysign<mode>3"
1558 (not:VF (match_dup 3))
1559 (match_operand:VF 1 "nonimmediate_operand" "")))
1561 (and:VF (match_dup 3)
1562 (match_operand:VF 2 "nonimmediate_operand" "")))
1563 (set (match_operand:VF 0 "register_operand" "")
1564 (ior:VF (match_dup 4) (match_dup 5)))]
1567 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1569 operands[4] = gen_reg_rtx (<MODE>mode);
1570 operands[5] = gen_reg_rtx (<MODE>mode);
1573 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits.
1578 (define_insn "*andnot<mode>3"
1579 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1582 (match_operand:MODEF 1 "register_operand" "0,x"))
1583 (match_operand:MODEF 2 "register_operand" "x,x")))]
1584 "SSE_FLOAT_MODE_P (<MODE>mode)"
1586 static char buf[32];
1589 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1591 switch (which_alternative)
1594 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1597 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1603 snprintf (buf, sizeof (buf), insn, suffix);
1606 [(set_attr "isa" "noavx,avx")
1607 (set_attr "type" "sselog")
1608 (set_attr "prefix" "orig,vex")
1609 (set_attr "mode" "<ssevecmode>")])
1611 (define_insn "*<code><mode>3"
1612 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1614 (match_operand:MODEF 1 "register_operand" "%0,x")
1615 (match_operand:MODEF 2 "register_operand" "x,x")))]
1616 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 static char buf[32];
1621 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1623 switch (which_alternative)
1626 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1629 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1635 snprintf (buf, sizeof (buf), insn, suffix);
1638 [(set_attr "isa" "noavx,avx")
1639 (set_attr "type" "sselog")
1640 (set_attr "prefix" "orig,vex")
1641 (set_attr "mode" "<ssevecmode>")])
1643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1645 ;; FMA4 floating point multiply/accumulate instructions. This
1646 ;; includes the scalar version of the instructions as well as the
1649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1651 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1652 ;; combine to generate a multiply/add with two memory references. We then
1653 ;; split this insn, into loading up the destination register with one of the
1654 ;; memory operations. If we don't manage to split the insn, reload will
1655 ;; generate the appropriate moves. The reason this is needed, is that combine
1656 ;; has already folded one of the memory references into both the multiply and
1657 ;; add insns, and it can't generate a new pseudo. I.e.:
1658 ;; (set (reg1) (mem (addr1)))
1659 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1660 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1662 ;; ??? This is historic, pre-dating the gimple fma transformation.
1663 ;; We could now properly represent that only one memory operand is
1664 ;; allowed and not be penalized during optimization.
1666 ;; Intrinsic FMA operations.
1668 ;; The standard names for fma is only available with SSE math enabled.
1669 (define_expand "fma<mode>4"
1670 [(set (match_operand:FMAMODE 0 "register_operand")
1672 (match_operand:FMAMODE 1 "nonimmediate_operand")
1673 (match_operand:FMAMODE 2 "nonimmediate_operand")
1674 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1675 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1677 (define_expand "fms<mode>4"
1678 [(set (match_operand:FMAMODE 0 "register_operand")
1680 (match_operand:FMAMODE 1 "nonimmediate_operand")
1681 (match_operand:FMAMODE 2 "nonimmediate_operand")
1682 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1683 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1685 (define_expand "fnma<mode>4"
1686 [(set (match_operand:FMAMODE 0 "register_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1689 (match_operand:FMAMODE 2 "nonimmediate_operand")
1690 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1691 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1693 (define_expand "fnms<mode>4"
1694 [(set (match_operand:FMAMODE 0 "register_operand")
1696 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1697 (match_operand:FMAMODE 2 "nonimmediate_operand")
1698 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1699 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1701 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1702 (define_expand "fma4i_fmadd_<mode>"
1703 [(set (match_operand:FMAMODE 0 "register_operand")
1705 (match_operand:FMAMODE 1 "nonimmediate_operand")
1706 (match_operand:FMAMODE 2 "nonimmediate_operand")
1707 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1708 "TARGET_FMA || TARGET_FMA4")
1710 (define_insn "*fma4i_fmadd_<mode>"
1711 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1713 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1714 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1715 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1717 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "*fma4i_fmsub_<mode>"
1722 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1724 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1725 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1727 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1729 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1730 [(set_attr "type" "ssemuladd")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "*fma4i_fnmadd_<mode>"
1734 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1737 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1738 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1739 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1741 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1742 [(set_attr "type" "ssemuladd")
1743 (set_attr "mode" "<MODE>")])
1745 (define_insn "*fma4i_fnmsub_<mode>"
1746 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1749 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1750 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1752 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1754 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1755 [(set_attr "type" "ssemuladd")
1756 (set_attr "mode" "<MODE>")])
1758 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1759 ;; entire destination register, with the high-order elements zeroed.
1761 (define_expand "fma4i_vmfmadd_<mode>"
1762 [(set (match_operand:VF_128 0 "register_operand")
1765 (match_operand:VF_128 1 "nonimmediate_operand")
1766 (match_operand:VF_128 2 "nonimmediate_operand")
1767 (match_operand:VF_128 3 "nonimmediate_operand"))
1772 operands[4] = CONST0_RTX (<MODE>mode);
1775 (define_expand "fmai_vmfmadd_<mode>"
1776 [(set (match_operand:VF_128 0 "register_operand")
1779 (match_operand:VF_128 1 "nonimmediate_operand")
1780 (match_operand:VF_128 2 "nonimmediate_operand")
1781 (match_operand:VF_128 3 "nonimmediate_operand"))
1786 (define_insn "*fmai_fmadd_<mode>"
1787 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1790 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1791 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1792 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1797 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1798 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1799 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1800 [(set_attr "type" "ssemuladd")
1801 (set_attr "mode" "<MODE>")])
1803 (define_insn "*fmai_fmsub_<mode>"
1804 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1807 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1808 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1810 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1815 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1816 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1817 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 (define_insn "*fmai_fnmadd_<mode>"
1822 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1826 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1827 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1828 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1833 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1834 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1835 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 (define_insn "*fmai_fnmsub_<mode>"
1840 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1844 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1845 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1847 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1852 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1853 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1854 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1855 [(set_attr "type" "ssemuladd")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "*fma4i_vmfmadd_<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1862 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1864 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1865 (match_operand:VF_128 4 "const0_operand" "")
1868 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1869 [(set_attr "type" "ssemuladd")
1870 (set_attr "mode" "<MODE>")])
1872 (define_insn "*fma4i_vmfmsub_<mode>"
1873 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1876 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1877 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1879 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1880 (match_operand:VF_128 4 "const0_operand" "")
1883 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1887 (define_insn "*fma4i_vmfnmadd_<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1892 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1893 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1894 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1895 (match_operand:VF_128 4 "const0_operand" "")
1898 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1899 [(set_attr "type" "ssemuladd")
1900 (set_attr "mode" "<MODE>")])
1902 (define_insn "*fma4i_vmfnmsub_<mode>"
1903 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1907 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1908 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1910 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1911 (match_operand:VF_128 4 "const0_operand" "")
1914 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1920 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1924 ;; It would be possible to represent these without the UNSPEC as
1927 ;; (fma op1 op2 op3)
1928 ;; (fma op1 op2 (neg op3))
1931 ;; But this doesn't seem useful in practice.
1933 (define_expand "fmaddsub_<mode>"
1934 [(set (match_operand:VF 0 "register_operand")
1936 [(match_operand:VF 1 "nonimmediate_operand")
1937 (match_operand:VF 2 "nonimmediate_operand")
1938 (match_operand:VF 3 "nonimmediate_operand")]
1940 "TARGET_FMA || TARGET_FMA4")
1942 (define_insn "*fma4_fmaddsub_<mode>"
1943 [(set (match_operand:VF 0 "register_operand" "=x,x")
1945 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1946 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1947 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1950 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1951 [(set_attr "type" "ssemuladd")
1952 (set_attr "mode" "<MODE>")])
1954 (define_insn "*fma4_fmsubadd_<mode>"
1955 [(set (match_operand:VF 0 "register_operand" "=x,x")
1957 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1958 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1960 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1963 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1964 [(set_attr "type" "ssemuladd")
1965 (set_attr "mode" "<MODE>")])
1967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1969 ;; FMA3 floating point multiply/accumulate instructions.
1971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1973 (define_insn "*fma_fmadd_<mode>"
1974 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1976 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1977 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1978 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1981 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1982 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1983 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1984 [(set_attr "type" "ssemuladd")
1985 (set_attr "mode" "<MODE>")])
1987 (define_insn "*fma_fmsub_<mode>"
1988 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1990 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1991 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1993 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1996 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1997 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1998 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1999 [(set_attr "type" "ssemuladd")
2000 (set_attr "mode" "<MODE>")])
2002 (define_insn "*fma_fnmadd_<mode>"
2003 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2006 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2007 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2008 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2011 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2012 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2013 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2014 [(set_attr "type" "ssemuladd")
2015 (set_attr "mode" "<MODE>")])
2017 (define_insn "*fma_fnmsub_<mode>"
2018 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2021 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2022 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2024 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2027 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2028 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2029 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2030 [(set_attr "type" "ssemuladd")
2031 (set_attr "mode" "<MODE>")])
2033 (define_insn "*fma_fmaddsub_<mode>"
2034 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2036 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2037 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2038 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2042 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2043 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2044 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2045 [(set_attr "type" "ssemuladd")
2046 (set_attr "mode" "<MODE>")])
2048 (define_insn "*fma_fmsubadd_<mode>"
2049 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2051 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2052 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2054 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2058 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2059 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2060 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2061 [(set_attr "type" "ssemuladd")
2062 (set_attr "mode" "<MODE>")])
2064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2066 ;; Parallel single-precision floating point conversion operations
2068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2070 (define_insn "sse_cvtpi2ps"
2071 [(set (match_operand:V4SF 0 "register_operand" "=x")
2074 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2075 (match_operand:V4SF 1 "register_operand" "0")
2078 "cvtpi2ps\t{%2, %0|%0, %2}"
2079 [(set_attr "type" "ssecvt")
2080 (set_attr "mode" "V4SF")])
2082 (define_insn "sse_cvtps2pi"
2083 [(set (match_operand:V2SI 0 "register_operand" "=y")
2085 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2087 (parallel [(const_int 0) (const_int 1)])))]
2089 "cvtps2pi\t{%1, %0|%0, %1}"
2090 [(set_attr "type" "ssecvt")
2091 (set_attr "unit" "mmx")
2092 (set_attr "mode" "DI")])
2094 (define_insn "sse_cvttps2pi"
2095 [(set (match_operand:V2SI 0 "register_operand" "=y")
2097 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2098 (parallel [(const_int 0) (const_int 1)])))]
2100 "cvttps2pi\t{%1, %0|%0, %1}"
2101 [(set_attr "type" "ssecvt")
2102 (set_attr "unit" "mmx")
2103 (set_attr "prefix_rep" "0")
2104 (set_attr "mode" "SF")])
2106 (define_insn "sse_cvtsi2ss"
2107 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2110 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2111 (match_operand:V4SF 1 "register_operand" "0,0,x")
2115 cvtsi2ss\t{%2, %0|%0, %2}
2116 cvtsi2ss\t{%2, %0|%0, %2}
2117 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2118 [(set_attr "isa" "noavx,noavx,avx")
2119 (set_attr "type" "sseicvt")
2120 (set_attr "athlon_decode" "vector,double,*")
2121 (set_attr "amdfam10_decode" "vector,double,*")
2122 (set_attr "bdver1_decode" "double,direct,*")
2123 (set_attr "prefix" "orig,orig,vex")
2124 (set_attr "mode" "SF")])
2126 (define_insn "sse_cvtsi2ssq"
2127 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2130 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2131 (match_operand:V4SF 1 "register_operand" "0,0,x")
2133 "TARGET_SSE && TARGET_64BIT"
2135 cvtsi2ssq\t{%2, %0|%0, %2}
2136 cvtsi2ssq\t{%2, %0|%0, %2}
2137 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2138 [(set_attr "isa" "noavx,noavx,avx")
2139 (set_attr "type" "sseicvt")
2140 (set_attr "athlon_decode" "vector,double,*")
2141 (set_attr "amdfam10_decode" "vector,double,*")
2142 (set_attr "bdver1_decode" "double,direct,*")
2143 (set_attr "length_vex" "*,*,4")
2144 (set_attr "prefix_rex" "1,1,*")
2145 (set_attr "prefix" "orig,orig,vex")
2146 (set_attr "mode" "SF")])
2148 (define_insn "sse_cvtss2si"
2149 [(set (match_operand:SI 0 "register_operand" "=r,r")
2152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2153 (parallel [(const_int 0)]))]
2154 UNSPEC_FIX_NOTRUNC))]
2156 "%vcvtss2si\t{%1, %0|%0, %1}"
2157 [(set_attr "type" "sseicvt")
2158 (set_attr "athlon_decode" "double,vector")
2159 (set_attr "bdver1_decode" "double,double")
2160 (set_attr "prefix_rep" "1")
2161 (set_attr "prefix" "maybe_vex")
2162 (set_attr "mode" "SI")])
2164 (define_insn "sse_cvtss2si_2"
2165 [(set (match_operand:SI 0 "register_operand" "=r,r")
2166 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2167 UNSPEC_FIX_NOTRUNC))]
2169 "%vcvtss2si\t{%1, %0|%0, %1}"
2170 [(set_attr "type" "sseicvt")
2171 (set_attr "athlon_decode" "double,vector")
2172 (set_attr "amdfam10_decode" "double,double")
2173 (set_attr "bdver1_decode" "double,double")
2174 (set_attr "prefix_rep" "1")
2175 (set_attr "prefix" "maybe_vex")
2176 (set_attr "mode" "SI")])
2178 (define_insn "sse_cvtss2siq"
2179 [(set (match_operand:DI 0 "register_operand" "=r,r")
2182 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2183 (parallel [(const_int 0)]))]
2184 UNSPEC_FIX_NOTRUNC))]
2185 "TARGET_SSE && TARGET_64BIT"
2186 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "bdver1_decode" "double,double")
2190 (set_attr "prefix_rep" "1")
2191 (set_attr "prefix" "maybe_vex")
2192 (set_attr "mode" "DI")])
2194 (define_insn "sse_cvtss2siq_2"
2195 [(set (match_operand:DI 0 "register_operand" "=r,r")
2196 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2197 UNSPEC_FIX_NOTRUNC))]
2198 "TARGET_SSE && TARGET_64BIT"
2199 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2200 [(set_attr "type" "sseicvt")
2201 (set_attr "athlon_decode" "double,vector")
2202 (set_attr "amdfam10_decode" "double,double")
2203 (set_attr "bdver1_decode" "double,double")
2204 (set_attr "prefix_rep" "1")
2205 (set_attr "prefix" "maybe_vex")
2206 (set_attr "mode" "DI")])
2208 (define_insn "sse_cvttss2si"
2209 [(set (match_operand:SI 0 "register_operand" "=r,r")
2212 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2213 (parallel [(const_int 0)]))))]
2215 "%vcvttss2si\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "sseicvt")
2217 (set_attr "athlon_decode" "double,vector")
2218 (set_attr "amdfam10_decode" "double,double")
2219 (set_attr "bdver1_decode" "double,double")
2220 (set_attr "prefix_rep" "1")
2221 (set_attr "prefix" "maybe_vex")
2222 (set_attr "mode" "SI")])
2224 (define_insn "sse_cvttss2siq"
2225 [(set (match_operand:DI 0 "register_operand" "=r,r")
2228 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2229 (parallel [(const_int 0)]))))]
2230 "TARGET_SSE && TARGET_64BIT"
2231 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "sseicvt")
2233 (set_attr "athlon_decode" "double,vector")
2234 (set_attr "amdfam10_decode" "double,double")
2235 (set_attr "bdver1_decode" "double,double")
2236 (set_attr "prefix_rep" "1")
2237 (set_attr "prefix" "maybe_vex")
2238 (set_attr "mode" "DI")])
2240 (define_insn "avx_cvtdq2ps256"
2241 [(set (match_operand:V8SF 0 "register_operand" "=x")
2242 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2244 "vcvtdq2ps\t{%1, %0|%0, %1}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "prefix" "vex")
2247 (set_attr "mode" "V8SF")])
2249 (define_insn "sse2_cvtdq2ps"
2250 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2253 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2254 [(set_attr "type" "ssecvt")
2255 (set_attr "prefix" "maybe_vex")
2256 (set_attr "mode" "V4SF")])
2258 (define_expand "sse2_cvtudq2ps"
2260 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2262 (lt:V4SF (match_dup 5) (match_dup 3)))
2264 (and:V4SF (match_dup 6) (match_dup 4)))
2265 (set (match_operand:V4SF 0 "register_operand" "")
2266 (plus:V4SF (match_dup 5) (match_dup 7)))]
2269 REAL_VALUE_TYPE TWO32r;
2273 real_ldexp (&TWO32r, &dconst1, 32);
2274 x = const_double_from_real_value (TWO32r, SFmode);
2276 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2277 operands[4] = force_reg (V4SFmode,
2278 ix86_build_const_vector (V4SFmode, 1, x));
2280 for (i = 5; i < 8; i++)
2281 operands[i] = gen_reg_rtx (V4SFmode);
2284 (define_insn "avx_cvtps2dq256"
2285 [(set (match_operand:V8SI 0 "register_operand" "=x")
2286 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2287 UNSPEC_FIX_NOTRUNC))]
2289 "vcvtps2dq\t{%1, %0|%0, %1}"
2290 [(set_attr "type" "ssecvt")
2291 (set_attr "prefix" "vex")
2292 (set_attr "mode" "OI")])
2294 (define_insn "sse2_cvtps2dq"
2295 [(set (match_operand:V4SI 0 "register_operand" "=x")
2296 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2297 UNSPEC_FIX_NOTRUNC))]
2299 "%vcvtps2dq\t{%1, %0|%0, %1}"
2300 [(set_attr "type" "ssecvt")
2301 (set (attr "prefix_data16")
2303 (match_test "TARGET_AVX")
2305 (const_string "1")))
2306 (set_attr "prefix" "maybe_vex")
2307 (set_attr "mode" "TI")])
2309 (define_insn "avx_cvttps2dq256"
2310 [(set (match_operand:V8SI 0 "register_operand" "=x")
2311 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2313 "vcvttps2dq\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "ssecvt")
2315 (set_attr "prefix" "vex")
2316 (set_attr "mode" "OI")])
2318 (define_insn "sse2_cvttps2dq"
2319 [(set (match_operand:V4SI 0 "register_operand" "=x")
2320 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2322 "%vcvttps2dq\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set (attr "prefix_rep")
2326 (match_test "TARGET_AVX")
2328 (const_string "1")))
2329 (set (attr "prefix_data16")
2331 (match_test "TARGET_AVX")
2333 (const_string "0")))
2334 (set_attr "prefix_data16" "0")
2335 (set_attr "prefix" "maybe_vex")
2336 (set_attr "mode" "TI")])
2338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2340 ;; Parallel double-precision floating point conversion operations
2342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2344 (define_insn "sse2_cvtpi2pd"
2345 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2346 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2348 "cvtpi2pd\t{%1, %0|%0, %1}"
2349 [(set_attr "type" "ssecvt")
2350 (set_attr "unit" "mmx,*")
2351 (set_attr "prefix_data16" "1,*")
2352 (set_attr "mode" "V2DF")])
2354 (define_insn "sse2_cvtpd2pi"
2355 [(set (match_operand:V2SI 0 "register_operand" "=y")
2356 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2357 UNSPEC_FIX_NOTRUNC))]
2359 "cvtpd2pi\t{%1, %0|%0, %1}"
2360 [(set_attr "type" "ssecvt")
2361 (set_attr "unit" "mmx")
2362 (set_attr "bdver1_decode" "double")
2363 (set_attr "prefix_data16" "1")
2364 (set_attr "mode" "DI")])
2366 (define_insn "sse2_cvttpd2pi"
2367 [(set (match_operand:V2SI 0 "register_operand" "=y")
2368 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2370 "cvttpd2pi\t{%1, %0|%0, %1}"
2371 [(set_attr "type" "ssecvt")
2372 (set_attr "unit" "mmx")
2373 (set_attr "bdver1_decode" "double")
2374 (set_attr "prefix_data16" "1")
2375 (set_attr "mode" "TI")])
2377 (define_insn "sse2_cvtsi2sd"
2378 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2381 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2382 (match_operand:V2DF 1 "register_operand" "0,0,x")
2386 cvtsi2sd\t{%2, %0|%0, %2}
2387 cvtsi2sd\t{%2, %0|%0, %2}
2388 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2389 [(set_attr "isa" "noavx,noavx,avx")
2390 (set_attr "type" "sseicvt")
2391 (set_attr "athlon_decode" "double,direct,*")
2392 (set_attr "amdfam10_decode" "vector,double,*")
2393 (set_attr "bdver1_decode" "double,direct,*")
2394 (set_attr "prefix" "orig,orig,vex")
2395 (set_attr "mode" "DF")])
2397 (define_insn "sse2_cvtsi2sdq"
2398 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2401 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2402 (match_operand:V2DF 1 "register_operand" "0,0,x")
2404 "TARGET_SSE2 && TARGET_64BIT"
2406 cvtsi2sdq\t{%2, %0|%0, %2}
2407 cvtsi2sdq\t{%2, %0|%0, %2}
2408 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2409 [(set_attr "isa" "noavx,noavx,avx")
2410 (set_attr "type" "sseicvt")
2411 (set_attr "athlon_decode" "double,direct,*")
2412 (set_attr "amdfam10_decode" "vector,double,*")
2413 (set_attr "bdver1_decode" "double,direct,*")
2414 (set_attr "length_vex" "*,*,4")
2415 (set_attr "prefix_rex" "1,1,*")
2416 (set_attr "prefix" "orig,orig,vex")
2417 (set_attr "mode" "DF")])
2419 (define_insn "sse2_cvtsd2si"
2420 [(set (match_operand:SI 0 "register_operand" "=r,r")
2423 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2424 (parallel [(const_int 0)]))]
2425 UNSPEC_FIX_NOTRUNC))]
2427 "%vcvtsd2si\t{%1, %0|%0, %1}"
2428 [(set_attr "type" "sseicvt")
2429 (set_attr "athlon_decode" "double,vector")
2430 (set_attr "bdver1_decode" "double,double")
2431 (set_attr "prefix_rep" "1")
2432 (set_attr "prefix" "maybe_vex")
2433 (set_attr "mode" "SI")])
2435 (define_insn "sse2_cvtsd2si_2"
2436 [(set (match_operand:SI 0 "register_operand" "=r,r")
2437 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2438 UNSPEC_FIX_NOTRUNC))]
2440 "%vcvtsd2si\t{%1, %0|%0, %1}"
2441 [(set_attr "type" "sseicvt")
2442 (set_attr "athlon_decode" "double,vector")
2443 (set_attr "amdfam10_decode" "double,double")
2444 (set_attr "bdver1_decode" "double,double")
2445 (set_attr "prefix_rep" "1")
2446 (set_attr "prefix" "maybe_vex")
2447 (set_attr "mode" "SI")])
2449 (define_insn "sse2_cvtsd2siq"
2450 [(set (match_operand:DI 0 "register_operand" "=r,r")
2453 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2454 (parallel [(const_int 0)]))]
2455 UNSPEC_FIX_NOTRUNC))]
2456 "TARGET_SSE2 && TARGET_64BIT"
2457 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sseicvt")
2459 (set_attr "athlon_decode" "double,vector")
2460 (set_attr "bdver1_decode" "double,double")
2461 (set_attr "prefix_rep" "1")
2462 (set_attr "prefix" "maybe_vex")
2463 (set_attr "mode" "DI")])
2465 (define_insn "sse2_cvtsd2siq_2"
2466 [(set (match_operand:DI 0 "register_operand" "=r,r")
2467 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2468 UNSPEC_FIX_NOTRUNC))]
2469 "TARGET_SSE2 && TARGET_64BIT"
2470 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2471 [(set_attr "type" "sseicvt")
2472 (set_attr "athlon_decode" "double,vector")
2473 (set_attr "amdfam10_decode" "double,double")
2474 (set_attr "bdver1_decode" "double,double")
2475 (set_attr "prefix_rep" "1")
2476 (set_attr "prefix" "maybe_vex")
2477 (set_attr "mode" "DI")])
2479 (define_insn "sse2_cvttsd2si"
2480 [(set (match_operand:SI 0 "register_operand" "=r,r")
2483 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2484 (parallel [(const_int 0)]))))]
2486 "%vcvttsd2si\t{%1, %0|%0, %1}"
2487 [(set_attr "type" "sseicvt")
2488 (set_attr "athlon_decode" "double,vector")
2489 (set_attr "amdfam10_decode" "double,double")
2490 (set_attr "bdver1_decode" "double,double")
2491 (set_attr "prefix_rep" "1")
2492 (set_attr "prefix" "maybe_vex")
2493 (set_attr "mode" "SI")])
2495 (define_insn "sse2_cvttsd2siq"
2496 [(set (match_operand:DI 0 "register_operand" "=r,r")
2499 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2500 (parallel [(const_int 0)]))))]
2501 "TARGET_SSE2 && TARGET_64BIT"
2502 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "double,vector")
2505 (set_attr "amdfam10_decode" "double,double")
2506 (set_attr "bdver1_decode" "double,double")
2507 (set_attr "prefix_rep" "1")
2508 (set_attr "prefix" "maybe_vex")
2509 (set_attr "mode" "DI")])
2511 (define_insn "avx_cvtdq2pd256"
2512 [(set (match_operand:V4DF 0 "register_operand" "=x")
2513 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2515 "vcvtdq2pd\t{%1, %0|%0, %1}"
2516 [(set_attr "type" "ssecvt")
2517 (set_attr "prefix" "vex")
2518 (set_attr "mode" "V4DF")])
2520 (define_insn "avx_cvtdq2pd256_2"
2521 [(set (match_operand:V4DF 0 "register_operand" "=x")
2524 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2525 (parallel [(const_int 0) (const_int 1)
2526 (const_int 2) (const_int 3)]))))]
2528 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2529 [(set_attr "type" "ssecvt")
2530 (set_attr "prefix" "vex")
2531 (set_attr "mode" "V4DF")])
2533 (define_insn "sse2_cvtdq2pd"
2534 [(set (match_operand:V2DF 0 "register_operand" "=x")
2537 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2538 (parallel [(const_int 0) (const_int 1)]))))]
2540 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "ssecvt")
2542 (set_attr "prefix" "maybe_vex")
2543 (set_attr "mode" "V2DF")])
2545 (define_insn "avx_cvtpd2dq256"
2546 [(set (match_operand:V4SI 0 "register_operand" "=x")
2547 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2548 UNSPEC_FIX_NOTRUNC))]
2550 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2551 [(set_attr "type" "ssecvt")
2552 (set_attr "prefix" "vex")
2553 (set_attr "mode" "OI")])
2555 (define_expand "sse2_cvtpd2dq"
2556 [(set (match_operand:V4SI 0 "register_operand" "")
2558 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2562 "operands[2] = CONST0_RTX (V2SImode);")
2564 (define_insn "*sse2_cvtpd2dq"
2565 [(set (match_operand:V4SI 0 "register_operand" "=x")
2567 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2569 (match_operand:V2SI 2 "const0_operand" "")))]
2573 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2575 return "cvtpd2dq\t{%1, %0|%0, %1}";
2577 [(set_attr "type" "ssecvt")
2578 (set_attr "prefix_rep" "1")
2579 (set_attr "prefix_data16" "0")
2580 (set_attr "prefix" "maybe_vex")
2581 (set_attr "mode" "TI")
2582 (set_attr "amdfam10_decode" "double")
2583 (set_attr "athlon_decode" "vector")
2584 (set_attr "bdver1_decode" "double")])
2586 (define_insn "avx_cvttpd2dq256"
2587 [(set (match_operand:V4SI 0 "register_operand" "=x")
2588 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2590 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "ssecvt")
2592 (set_attr "prefix" "vex")
2593 (set_attr "mode" "OI")])
2595 (define_expand "sse2_cvttpd2dq"
2596 [(set (match_operand:V4SI 0 "register_operand" "")
2598 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2601 "operands[2] = CONST0_RTX (V2SImode);")
2603 (define_insn "*sse2_cvttpd2dq"
2604 [(set (match_operand:V4SI 0 "register_operand" "=x")
2606 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2607 (match_operand:V2SI 2 "const0_operand" "")))]
2611 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2613 return "cvttpd2dq\t{%1, %0|%0, %1}";
2615 [(set_attr "type" "ssecvt")
2616 (set_attr "amdfam10_decode" "double")
2617 (set_attr "athlon_decode" "vector")
2618 (set_attr "bdver1_decode" "double")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "mode" "TI")])
2622 (define_insn "sse2_cvtsd2ss"
2623 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2626 (float_truncate:V2SF
2627 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2628 (match_operand:V4SF 1 "register_operand" "0,0,x")
2632 cvtsd2ss\t{%2, %0|%0, %2}
2633 cvtsd2ss\t{%2, %0|%0, %2}
2634 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2635 [(set_attr "isa" "noavx,noavx,avx")
2636 (set_attr "type" "ssecvt")
2637 (set_attr "athlon_decode" "vector,double,*")
2638 (set_attr "amdfam10_decode" "vector,double,*")
2639 (set_attr "bdver1_decode" "direct,direct,*")
2640 (set_attr "prefix" "orig,orig,vex")
2641 (set_attr "mode" "SF")])
2643 (define_insn "sse2_cvtss2sd"
2644 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2648 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2649 (parallel [(const_int 0) (const_int 1)])))
2650 (match_operand:V2DF 1 "register_operand" "0,0,x")
2654 cvtss2sd\t{%2, %0|%0, %2}
2655 cvtss2sd\t{%2, %0|%0, %2}
2656 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2657 [(set_attr "isa" "noavx,noavx,avx")
2658 (set_attr "type" "ssecvt")
2659 (set_attr "amdfam10_decode" "vector,double,*")
2660 (set_attr "athlon_decode" "direct,direct,*")
2661 (set_attr "bdver1_decode" "direct,direct,*")
2662 (set_attr "prefix" "orig,orig,vex")
2663 (set_attr "mode" "DF")])
2665 (define_insn "avx_cvtpd2ps256"
2666 [(set (match_operand:V4SF 0 "register_operand" "=x")
2667 (float_truncate:V4SF
2668 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2670 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2671 [(set_attr "type" "ssecvt")
2672 (set_attr "prefix" "vex")
2673 (set_attr "mode" "V4SF")])
2675 (define_expand "sse2_cvtpd2ps"
2676 [(set (match_operand:V4SF 0 "register_operand" "")
2678 (float_truncate:V2SF
2679 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2682 "operands[2] = CONST0_RTX (V2SFmode);")
2684 (define_insn "*sse2_cvtpd2ps"
2685 [(set (match_operand:V4SF 0 "register_operand" "=x")
2687 (float_truncate:V2SF
2688 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2689 (match_operand:V2SF 2 "const0_operand" "")))]
2693 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2695 return "cvtpd2ps\t{%1, %0|%0, %1}";
2697 [(set_attr "type" "ssecvt")
2698 (set_attr "amdfam10_decode" "double")
2699 (set_attr "athlon_decode" "vector")
2700 (set_attr "bdver1_decode" "double")
2701 (set_attr "prefix_data16" "1")
2702 (set_attr "prefix" "maybe_vex")
2703 (set_attr "mode" "V4SF")])
2705 (define_insn "avx_cvtps2pd256"
2706 [(set (match_operand:V4DF 0 "register_operand" "=x")
2708 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2710 "vcvtps2pd\t{%1, %0|%0, %1}"
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "prefix" "vex")
2713 (set_attr "mode" "V4DF")])
2715 (define_insn "*avx_cvtps2pd256_2"
2716 [(set (match_operand:V4DF 0 "register_operand" "=x")
2719 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2720 (parallel [(const_int 0) (const_int 1)
2721 (const_int 2) (const_int 3)]))))]
2723 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2724 [(set_attr "type" "ssecvt")
2725 (set_attr "prefix" "vex")
2726 (set_attr "mode" "V4DF")])
2728 (define_insn "sse2_cvtps2pd"
2729 [(set (match_operand:V2DF 0 "register_operand" "=x")
2732 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2733 (parallel [(const_int 0) (const_int 1)]))))]
2735 "%vcvtps2pd\t{%1, %0|%0, %1}"
2736 [(set_attr "type" "ssecvt")
2737 (set_attr "amdfam10_decode" "direct")
2738 (set_attr "athlon_decode" "double")
2739 (set_attr "bdver1_decode" "double")
2740 (set_attr "prefix_data16" "0")
2741 (set_attr "prefix" "maybe_vex")
2742 (set_attr "mode" "V2DF")])
2744 (define_expand "vec_unpacks_hi_v4sf"
2749 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2750 (parallel [(const_int 6) (const_int 7)
2751 (const_int 2) (const_int 3)])))
2752 (set (match_operand:V2DF 0 "register_operand" "")
2756 (parallel [(const_int 0) (const_int 1)]))))]
2758 "operands[2] = gen_reg_rtx (V4SFmode);")
2760 (define_expand "vec_unpacks_hi_v8sf"
2763 (match_operand:V8SF 1 "nonimmediate_operand" "")
2764 (parallel [(const_int 4) (const_int 5)
2765 (const_int 6) (const_int 7)])))
2766 (set (match_operand:V4DF 0 "register_operand" "")
2770 "operands[2] = gen_reg_rtx (V4SFmode);")
2772 (define_expand "vec_unpacks_lo_v4sf"
2773 [(set (match_operand:V2DF 0 "register_operand" "")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "")
2777 (parallel [(const_int 0) (const_int 1)]))))]
2780 (define_expand "vec_unpacks_lo_v8sf"
2781 [(set (match_operand:V4DF 0 "register_operand" "")
2784 (match_operand:V8SF 1 "nonimmediate_operand" "")
2785 (parallel [(const_int 0) (const_int 1)
2786 (const_int 2) (const_int 3)]))))]
2789 (define_mode_attr sseunpackfltmode
2790 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2792 (define_expand "vec_unpacks_float_hi_<mode>"
2793 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2794 (match_operand:VI2_AVX2 1 "register_operand" "")]
2797 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2799 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2800 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2801 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2805 (define_expand "vec_unpacks_float_lo_<mode>"
2806 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2807 (match_operand:VI2_AVX2 1 "register_operand" "")]
2810 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2812 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2813 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2814 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2818 (define_expand "vec_unpacku_float_hi_<mode>"
2819 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2820 (match_operand:VI2_AVX2 1 "register_operand" "")]
2823 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2825 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2826 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2827 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2831 (define_expand "vec_unpacku_float_lo_<mode>"
2832 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2833 (match_operand:VI2_AVX2 1 "register_operand" "")]
2836 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2838 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2839 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2840 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2844 (define_expand "vec_unpacks_float_hi_v4si"
2847 (match_operand:V4SI 1 "nonimmediate_operand" "")
2848 (parallel [(const_int 2) (const_int 3)
2849 (const_int 2) (const_int 3)])))
2850 (set (match_operand:V2DF 0 "register_operand" "")
2854 (parallel [(const_int 0) (const_int 1)]))))]
2856 "operands[2] = gen_reg_rtx (V4SImode);")
2858 (define_expand "vec_unpacks_float_lo_v4si"
2859 [(set (match_operand:V2DF 0 "register_operand" "")
2862 (match_operand:V4SI 1 "nonimmediate_operand" "")
2863 (parallel [(const_int 0) (const_int 1)]))))]
2866 (define_expand "vec_unpacks_float_hi_v8si"
2869 (match_operand:V8SI 1 "nonimmediate_operand" "")
2870 (parallel [(const_int 4) (const_int 5)
2871 (const_int 6) (const_int 7)])))
2872 (set (match_operand:V4DF 0 "register_operand" "")
2876 "operands[2] = gen_reg_rtx (V4SImode);")
2878 (define_expand "vec_unpacks_float_lo_v8si"
2879 [(set (match_operand:V4DF 0 "register_operand" "")
2882 (match_operand:V8SI 1 "nonimmediate_operand" "")
2883 (parallel [(const_int 0) (const_int 1)
2884 (const_int 2) (const_int 3)]))))]
2887 (define_expand "vec_unpacku_float_hi_v4si"
2890 (match_operand:V4SI 1 "nonimmediate_operand" "")
2891 (parallel [(const_int 2) (const_int 3)
2892 (const_int 2) (const_int 3)])))
2897 (parallel [(const_int 0) (const_int 1)]))))
2899 (lt:V2DF (match_dup 6) (match_dup 3)))
2901 (and:V2DF (match_dup 7) (match_dup 4)))
2902 (set (match_operand:V2DF 0 "register_operand" "")
2903 (plus:V2DF (match_dup 6) (match_dup 8)))]
2906 REAL_VALUE_TYPE TWO32r;
2910 real_ldexp (&TWO32r, &dconst1, 32);
2911 x = const_double_from_real_value (TWO32r, DFmode);
2913 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2914 operands[4] = force_reg (V2DFmode,
2915 ix86_build_const_vector (V2DFmode, 1, x));
2917 operands[5] = gen_reg_rtx (V4SImode);
2919 for (i = 6; i < 9; i++)
2920 operands[i] = gen_reg_rtx (V2DFmode);
2923 (define_expand "vec_unpacku_float_lo_v4si"
2927 (match_operand:V4SI 1 "nonimmediate_operand" "")
2928 (parallel [(const_int 0) (const_int 1)]))))
2930 (lt:V2DF (match_dup 5) (match_dup 3)))
2932 (and:V2DF (match_dup 6) (match_dup 4)))
2933 (set (match_operand:V2DF 0 "register_operand" "")
2934 (plus:V2DF (match_dup 5) (match_dup 7)))]
2937 REAL_VALUE_TYPE TWO32r;
2941 real_ldexp (&TWO32r, &dconst1, 32);
2942 x = const_double_from_real_value (TWO32r, DFmode);
2944 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2945 operands[4] = force_reg (V2DFmode,
2946 ix86_build_const_vector (V2DFmode, 1, x));
2948 for (i = 5; i < 8; i++)
2949 operands[i] = gen_reg_rtx (V2DFmode);
2952 (define_expand "vec_unpacku_float_hi_v8si"
2953 [(match_operand:V4DF 0 "register_operand" "")
2954 (match_operand:V8SI 1 "register_operand" "")]
2957 REAL_VALUE_TYPE TWO32r;
2961 real_ldexp (&TWO32r, &dconst1, 32);
2962 x = const_double_from_real_value (TWO32r, DFmode);
2964 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2965 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2966 tmp[5] = gen_reg_rtx (V4SImode);
2968 for (i = 2; i < 5; i++)
2969 tmp[i] = gen_reg_rtx (V4DFmode);
2970 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2971 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
2972 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2973 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2974 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2975 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2979 (define_expand "vec_unpacku_float_lo_v8si"
2980 [(match_operand:V4DF 0 "register_operand" "")
2981 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2984 REAL_VALUE_TYPE TWO32r;
2988 real_ldexp (&TWO32r, &dconst1, 32);
2989 x = const_double_from_real_value (TWO32r, DFmode);
2991 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2992 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2994 for (i = 2; i < 5; i++)
2995 tmp[i] = gen_reg_rtx (V4DFmode);
2996 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
2997 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2998 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2999 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3000 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3004 (define_expand "vec_pack_trunc_v4df"
3006 (float_truncate:V4SF
3007 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3009 (float_truncate:V4SF
3010 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3011 (set (match_operand:V8SF 0 "register_operand" "")
3017 operands[3] = gen_reg_rtx (V4SFmode);
3018 operands[4] = gen_reg_rtx (V4SFmode);
3021 (define_expand "vec_pack_trunc_v2df"
3022 [(match_operand:V4SF 0 "register_operand" "")
3023 (match_operand:V2DF 1 "nonimmediate_operand" "")
3024 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3029 r1 = gen_reg_rtx (V4SFmode);
3030 r2 = gen_reg_rtx (V4SFmode);
3032 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3033 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3034 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3038 (define_expand "vec_pack_sfix_trunc_v2df"
3039 [(match_operand:V4SI 0 "register_operand" "")
3040 (match_operand:V2DF 1 "nonimmediate_operand" "")
3041 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3046 r1 = gen_reg_rtx (V4SImode);
3047 r2 = gen_reg_rtx (V4SImode);
3049 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3050 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3051 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3052 gen_lowpart (V2DImode, r1),
3053 gen_lowpart (V2DImode, r2)));
3057 (define_expand "vec_pack_sfix_v2df"
3058 [(match_operand:V4SI 0 "register_operand" "")
3059 (match_operand:V2DF 1 "nonimmediate_operand" "")
3060 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3065 r1 = gen_reg_rtx (V4SImode);
3066 r2 = gen_reg_rtx (V4SImode);
3068 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3069 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3070 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3071 gen_lowpart (V2DImode, r1),
3072 gen_lowpart (V2DImode, r2)));
3076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3078 ;; Parallel single-precision floating point element swizzling
3080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3082 (define_expand "sse_movhlps_exp"
3083 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3086 (match_operand:V4SF 1 "nonimmediate_operand" "")
3087 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3088 (parallel [(const_int 6)
3094 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3096 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3098 /* Fix up the destination if needed. */
3099 if (dst != operands[0])
3100 emit_move_insn (operands[0], dst);
3105 (define_insn "sse_movhlps"
3106 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3109 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3110 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3111 (parallel [(const_int 6)
3115 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3117 movhlps\t{%2, %0|%0, %2}
3118 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3119 movlps\t{%H2, %0|%0, %H2}
3120 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3121 %vmovhps\t{%2, %0|%0, %2}"
3122 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3123 (set_attr "type" "ssemov")
3124 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3125 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3127 (define_expand "sse_movlhps_exp"
3128 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3131 (match_operand:V4SF 1 "nonimmediate_operand" "")
3132 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3133 (parallel [(const_int 0)
3139 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3141 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3143 /* Fix up the destination if needed. */
3144 if (dst != operands[0])
3145 emit_move_insn (operands[0], dst);
3150 (define_insn "sse_movlhps"
3151 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3154 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3155 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3156 (parallel [(const_int 0)
3160 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3162 movlhps\t{%2, %0|%0, %2}
3163 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3164 movhps\t{%2, %0|%0, %2}
3165 vmovhps\t{%2, %1, %0|%0, %1, %2}
3166 %vmovlps\t{%2, %H0|%H0, %2}"
3167 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3168 (set_attr "type" "ssemov")
3169 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3170 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3172 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3173 (define_insn "avx_unpckhps256"
3174 [(set (match_operand:V8SF 0 "register_operand" "=x")
3177 (match_operand:V8SF 1 "register_operand" "x")
3178 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3179 (parallel [(const_int 2) (const_int 10)
3180 (const_int 3) (const_int 11)
3181 (const_int 6) (const_int 14)
3182 (const_int 7) (const_int 15)])))]
3184 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3185 [(set_attr "type" "sselog")
3186 (set_attr "prefix" "vex")
3187 (set_attr "mode" "V8SF")])
3189 (define_expand "vec_interleave_highv8sf"
3193 (match_operand:V8SF 1 "register_operand" "x")
3194 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3195 (parallel [(const_int 0) (const_int 8)
3196 (const_int 1) (const_int 9)
3197 (const_int 4) (const_int 12)
3198 (const_int 5) (const_int 13)])))
3204 (parallel [(const_int 2) (const_int 10)
3205 (const_int 3) (const_int 11)
3206 (const_int 6) (const_int 14)
3207 (const_int 7) (const_int 15)])))
3208 (set (match_operand:V8SF 0 "register_operand" "")
3213 (parallel [(const_int 4) (const_int 5)
3214 (const_int 6) (const_int 7)
3215 (const_int 12) (const_int 13)
3216 (const_int 14) (const_int 15)])))]
3219 operands[3] = gen_reg_rtx (V8SFmode);
3220 operands[4] = gen_reg_rtx (V8SFmode);
3223 (define_insn "vec_interleave_highv4sf"
3224 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3227 (match_operand:V4SF 1 "register_operand" "0,x")
3228 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3229 (parallel [(const_int 2) (const_int 6)
3230 (const_int 3) (const_int 7)])))]
3233 unpckhps\t{%2, %0|%0, %2}
3234 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3235 [(set_attr "isa" "noavx,avx")
3236 (set_attr "type" "sselog")
3237 (set_attr "prefix" "orig,vex")
3238 (set_attr "mode" "V4SF")])
3240 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3241 (define_insn "avx_unpcklps256"
3242 [(set (match_operand:V8SF 0 "register_operand" "=x")
3245 (match_operand:V8SF 1 "register_operand" "x")
3246 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3247 (parallel [(const_int 0) (const_int 8)
3248 (const_int 1) (const_int 9)
3249 (const_int 4) (const_int 12)
3250 (const_int 5) (const_int 13)])))]
3252 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3253 [(set_attr "type" "sselog")
3254 (set_attr "prefix" "vex")
3255 (set_attr "mode" "V8SF")])
3257 (define_expand "vec_interleave_lowv8sf"
3261 (match_operand:V8SF 1 "register_operand" "x")
3262 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3263 (parallel [(const_int 0) (const_int 8)
3264 (const_int 1) (const_int 9)
3265 (const_int 4) (const_int 12)
3266 (const_int 5) (const_int 13)])))
3272 (parallel [(const_int 2) (const_int 10)
3273 (const_int 3) (const_int 11)
3274 (const_int 6) (const_int 14)
3275 (const_int 7) (const_int 15)])))
3276 (set (match_operand:V8SF 0 "register_operand" "")
3281 (parallel [(const_int 0) (const_int 1)
3282 (const_int 2) (const_int 3)
3283 (const_int 8) (const_int 9)
3284 (const_int 10) (const_int 11)])))]
3287 operands[3] = gen_reg_rtx (V8SFmode);
3288 operands[4] = gen_reg_rtx (V8SFmode);
3291 (define_insn "vec_interleave_lowv4sf"
3292 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3295 (match_operand:V4SF 1 "register_operand" "0,x")
3296 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3297 (parallel [(const_int 0) (const_int 4)
3298 (const_int 1) (const_int 5)])))]
3301 unpcklps\t{%2, %0|%0, %2}
3302 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3303 [(set_attr "isa" "noavx,avx")
3304 (set_attr "type" "sselog")
3305 (set_attr "prefix" "orig,vex")
3306 (set_attr "mode" "V4SF")])
3308 ;; These are modeled with the same vec_concat as the others so that we
3309 ;; capture users of shufps that can use the new instructions
3310 (define_insn "avx_movshdup256"
3311 [(set (match_operand:V8SF 0 "register_operand" "=x")
3314 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3316 (parallel [(const_int 1) (const_int 1)
3317 (const_int 3) (const_int 3)
3318 (const_int 5) (const_int 5)
3319 (const_int 7) (const_int 7)])))]
3321 "vmovshdup\t{%1, %0|%0, %1}"
3322 [(set_attr "type" "sse")
3323 (set_attr "prefix" "vex")
3324 (set_attr "mode" "V8SF")])
3326 (define_insn "sse3_movshdup"
3327 [(set (match_operand:V4SF 0 "register_operand" "=x")
3330 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3332 (parallel [(const_int 1)
3337 "%vmovshdup\t{%1, %0|%0, %1}"
3338 [(set_attr "type" "sse")
3339 (set_attr "prefix_rep" "1")
3340 (set_attr "prefix" "maybe_vex")
3341 (set_attr "mode" "V4SF")])
3343 (define_insn "avx_movsldup256"
3344 [(set (match_operand:V8SF 0 "register_operand" "=x")
3347 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3349 (parallel [(const_int 0) (const_int 0)
3350 (const_int 2) (const_int 2)
3351 (const_int 4) (const_int 4)
3352 (const_int 6) (const_int 6)])))]
3354 "vmovsldup\t{%1, %0|%0, %1}"
3355 [(set_attr "type" "sse")
3356 (set_attr "prefix" "vex")
3357 (set_attr "mode" "V8SF")])
3359 (define_insn "sse3_movsldup"
3360 [(set (match_operand:V4SF 0 "register_operand" "=x")
3363 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3365 (parallel [(const_int 0)
3370 "%vmovsldup\t{%1, %0|%0, %1}"
3371 [(set_attr "type" "sse")
3372 (set_attr "prefix_rep" "1")
3373 (set_attr "prefix" "maybe_vex")
3374 (set_attr "mode" "V4SF")])
3376 (define_expand "avx_shufps256"
3377 [(match_operand:V8SF 0 "register_operand" "")
3378 (match_operand:V8SF 1 "register_operand" "")
3379 (match_operand:V8SF 2 "nonimmediate_operand" "")
3380 (match_operand:SI 3 "const_int_operand" "")]
3383 int mask = INTVAL (operands[3]);
3384 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3385 GEN_INT ((mask >> 0) & 3),
3386 GEN_INT ((mask >> 2) & 3),
3387 GEN_INT (((mask >> 4) & 3) + 8),
3388 GEN_INT (((mask >> 6) & 3) + 8),
3389 GEN_INT (((mask >> 0) & 3) + 4),
3390 GEN_INT (((mask >> 2) & 3) + 4),
3391 GEN_INT (((mask >> 4) & 3) + 12),
3392 GEN_INT (((mask >> 6) & 3) + 12)));
3396 ;; One bit in mask selects 2 elements.
3397 (define_insn "avx_shufps256_1"
3398 [(set (match_operand:V8SF 0 "register_operand" "=x")
3401 (match_operand:V8SF 1 "register_operand" "x")
3402 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3403 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3404 (match_operand 4 "const_0_to_3_operand" "")
3405 (match_operand 5 "const_8_to_11_operand" "")
3406 (match_operand 6 "const_8_to_11_operand" "")
3407 (match_operand 7 "const_4_to_7_operand" "")
3408 (match_operand 8 "const_4_to_7_operand" "")
3409 (match_operand 9 "const_12_to_15_operand" "")
3410 (match_operand 10 "const_12_to_15_operand" "")])))]
3412 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3413 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3414 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3415 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3418 mask = INTVAL (operands[3]);
3419 mask |= INTVAL (operands[4]) << 2;
3420 mask |= (INTVAL (operands[5]) - 8) << 4;
3421 mask |= (INTVAL (operands[6]) - 8) << 6;
3422 operands[3] = GEN_INT (mask);
3424 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3426 [(set_attr "type" "sselog")
3427 (set_attr "length_immediate" "1")
3428 (set_attr "prefix" "vex")
3429 (set_attr "mode" "V8SF")])
3431 (define_expand "sse_shufps"
3432 [(match_operand:V4SF 0 "register_operand" "")
3433 (match_operand:V4SF 1 "register_operand" "")
3434 (match_operand:V4SF 2 "nonimmediate_operand" "")
3435 (match_operand:SI 3 "const_int_operand" "")]
3438 int mask = INTVAL (operands[3]);
3439 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3440 GEN_INT ((mask >> 0) & 3),
3441 GEN_INT ((mask >> 2) & 3),
3442 GEN_INT (((mask >> 4) & 3) + 4),
3443 GEN_INT (((mask >> 6) & 3) + 4)));
3447 (define_insn "sse_shufps_<mode>"
3448 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3449 (vec_select:VI4F_128
3450 (vec_concat:<ssedoublevecmode>
3451 (match_operand:VI4F_128 1 "register_operand" "0,x")
3452 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3453 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3454 (match_operand 4 "const_0_to_3_operand" "")
3455 (match_operand 5 "const_4_to_7_operand" "")
3456 (match_operand 6 "const_4_to_7_operand" "")])))]
3460 mask |= INTVAL (operands[3]) << 0;
3461 mask |= INTVAL (operands[4]) << 2;
3462 mask |= (INTVAL (operands[5]) - 4) << 4;
3463 mask |= (INTVAL (operands[6]) - 4) << 6;
3464 operands[3] = GEN_INT (mask);
3466 switch (which_alternative)
3469 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3471 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3476 [(set_attr "isa" "noavx,avx")
3477 (set_attr "type" "sselog")
3478 (set_attr "length_immediate" "1")
3479 (set_attr "prefix" "orig,vex")
3480 (set_attr "mode" "V4SF")])
3482 (define_insn "sse_storehps"
3483 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3485 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3486 (parallel [(const_int 2) (const_int 3)])))]
3489 %vmovhps\t{%1, %0|%0, %1}
3490 %vmovhlps\t{%1, %d0|%d0, %1}
3491 %vmovlps\t{%H1, %d0|%d0, %H1}"
3492 [(set_attr "type" "ssemov")
3493 (set_attr "prefix" "maybe_vex")
3494 (set_attr "mode" "V2SF,V4SF,V2SF")])
3496 (define_expand "sse_loadhps_exp"
3497 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3500 (match_operand:V4SF 1 "nonimmediate_operand" "")
3501 (parallel [(const_int 0) (const_int 1)]))
3502 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3505 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3507 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3509 /* Fix up the destination if needed. */
3510 if (dst != operands[0])
3511 emit_move_insn (operands[0], dst);
3516 (define_insn "sse_loadhps"
3517 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3520 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3521 (parallel [(const_int 0) (const_int 1)]))
3522 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3525 movhps\t{%2, %0|%0, %2}
3526 vmovhps\t{%2, %1, %0|%0, %1, %2}
3527 movlhps\t{%2, %0|%0, %2}
3528 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3529 %vmovlps\t{%2, %H0|%H0, %2}"
3530 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3531 (set_attr "type" "ssemov")
3532 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3533 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3535 (define_insn "sse_storelps"
3536 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3538 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3539 (parallel [(const_int 0) (const_int 1)])))]
3542 %vmovlps\t{%1, %0|%0, %1}
3543 %vmovaps\t{%1, %0|%0, %1}
3544 %vmovlps\t{%1, %d0|%d0, %1}"
3545 [(set_attr "type" "ssemov")
3546 (set_attr "prefix" "maybe_vex")
3547 (set_attr "mode" "V2SF,V4SF,V2SF")])
3549 (define_expand "sse_loadlps_exp"
3550 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3552 (match_operand:V2SF 2 "nonimmediate_operand" "")
3554 (match_operand:V4SF 1 "nonimmediate_operand" "")
3555 (parallel [(const_int 2) (const_int 3)]))))]
3558 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3560 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3562 /* Fix up the destination if needed. */
3563 if (dst != operands[0])
3564 emit_move_insn (operands[0], dst);
3569 (define_insn "sse_loadlps"
3570 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3572 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3574 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3575 (parallel [(const_int 2) (const_int 3)]))))]
3578 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3579 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3580 movlps\t{%2, %0|%0, %2}
3581 vmovlps\t{%2, %1, %0|%0, %1, %2}
3582 %vmovlps\t{%2, %0|%0, %2}"
3583 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3584 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3585 (set_attr "length_immediate" "1,1,*,*,*")
3586 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3587 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3589 (define_insn "sse_movss"
3590 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3592 (match_operand:V4SF 2 "register_operand" " x,x")
3593 (match_operand:V4SF 1 "register_operand" " 0,x")
3597 movss\t{%2, %0|%0, %2}
3598 vmovss\t{%2, %1, %0|%0, %1, %2}"
3599 [(set_attr "isa" "noavx,avx")
3600 (set_attr "type" "ssemov")
3601 (set_attr "prefix" "orig,vex")
3602 (set_attr "mode" "SF")])
3604 (define_expand "vec_dupv4sf"
3605 [(set (match_operand:V4SF 0 "register_operand" "")
3607 (match_operand:SF 1 "nonimmediate_operand" "")))]
3611 operands[1] = force_reg (SFmode, operands[1]);
3614 (define_insn "avx2_vec_dupv4sf"
3615 [(set (match_operand:V4SF 0 "register_operand" "=x")
3618 (match_operand:V4SF 1 "register_operand" "x")
3619 (parallel [(const_int 0)]))))]
3621 "vbroadcastss\t{%1, %0|%0, %1}"
3622 [(set_attr "type" "sselog1")
3623 (set_attr "prefix" "vex")
3624 (set_attr "mode" "V4SF")])
3626 (define_insn "*vec_dupv4sf_avx"
3627 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3629 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3632 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3633 vbroadcastss\t{%1, %0|%0, %1}"
3634 [(set_attr "type" "sselog1,ssemov")
3635 (set_attr "length_immediate" "1,0")
3636 (set_attr "prefix_extra" "0,1")
3637 (set_attr "prefix" "vex")
3638 (set_attr "mode" "V4SF")])
3640 (define_insn "avx2_vec_dupv8sf"
3641 [(set (match_operand:V8SF 0 "register_operand" "=x")
3644 (match_operand:V4SF 1 "register_operand" "x")
3645 (parallel [(const_int 0)]))))]
3647 "vbroadcastss\t{%1, %0|%0, %1}"
3648 [(set_attr "type" "sselog1")
3649 (set_attr "prefix" "vex")
3650 (set_attr "mode" "V8SF")])
3652 (define_insn "*vec_dupv4sf"
3653 [(set (match_operand:V4SF 0 "register_operand" "=x")
3655 (match_operand:SF 1 "register_operand" "0")))]
3657 "shufps\t{$0, %0, %0|%0, %0, 0}"
3658 [(set_attr "type" "sselog1")
3659 (set_attr "length_immediate" "1")
3660 (set_attr "mode" "V4SF")])
3662 ;; Although insertps takes register source, we prefer
3663 ;; unpcklps with register source since it is shorter.
3664 (define_insn "*vec_concatv2sf_sse4_1"
3665 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3667 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3668 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3671 unpcklps\t{%2, %0|%0, %2}
3672 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3673 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3674 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3675 %vmovss\t{%1, %0|%0, %1}
3676 punpckldq\t{%2, %0|%0, %2}
3677 movd\t{%1, %0|%0, %1}"
3678 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3679 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3680 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3681 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3682 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3683 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3684 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3686 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3687 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3688 ;; alternatives pretty much forces the MMX alternative to be chosen.
3689 (define_insn "*vec_concatv2sf_sse"
3690 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3692 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3693 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3696 unpcklps\t{%2, %0|%0, %2}
3697 movss\t{%1, %0|%0, %1}
3698 punpckldq\t{%2, %0|%0, %2}
3699 movd\t{%1, %0|%0, %1}"
3700 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3701 (set_attr "mode" "V4SF,SF,DI,DI")])
3703 (define_insn "*vec_concatv4sf"
3704 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3706 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3707 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3710 movlhps\t{%2, %0|%0, %2}
3711 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3712 movhps\t{%2, %0|%0, %2}
3713 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3714 [(set_attr "isa" "noavx,avx,noavx,avx")
3715 (set_attr "type" "ssemov")
3716 (set_attr "prefix" "orig,vex,orig,vex")
3717 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3719 (define_expand "vec_init<mode>"
3720 [(match_operand:V_128 0 "register_operand" "")
3721 (match_operand 1 "" "")]
3724 ix86_expand_vector_init (false, operands[0], operands[1]);
3728 ;; Avoid combining registers from different units in a single alternative,
3729 ;; see comment above inline_secondary_memory_needed function in i386.c
3730 (define_insn "vec_set<mode>_0"
3731 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3732 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3734 (vec_duplicate:VI4F_128
3735 (match_operand:<ssescalarmode> 2 "general_operand"
3736 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3737 (match_operand:VI4F_128 1 "vector_move_operand"
3738 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3742 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3743 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3744 %vmovd\t{%2, %0|%0, %2}
3745 movss\t{%2, %0|%0, %2}
3746 movss\t{%2, %0|%0, %2}
3747 vmovss\t{%2, %1, %0|%0, %1, %2}
3748 pinsrd\t{$0, %2, %0|%0, %2, 0}
3749 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3753 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3755 (cond [(eq_attr "alternative" "0,6,7")
3756 (const_string "sselog")
3757 (eq_attr "alternative" "9")
3758 (const_string "fmov")
3759 (eq_attr "alternative" "10")
3760 (const_string "imov")
3762 (const_string "ssemov")))
3763 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3764 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3765 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3766 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3768 ;; A subset is vec_setv4sf.
3769 (define_insn "*vec_setv4sf_sse4_1"
3770 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3773 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3774 (match_operand:V4SF 1 "register_operand" "0,x")
3775 (match_operand:SI 3 "const_int_operand" "")))]
3777 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3778 < GET_MODE_NUNITS (V4SFmode))"
3780 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3781 switch (which_alternative)
3784 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3786 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3791 [(set_attr "isa" "noavx,avx")
3792 (set_attr "type" "sselog")
3793 (set_attr "prefix_data16" "1,*")
3794 (set_attr "prefix_extra" "1")
3795 (set_attr "length_immediate" "1")
3796 (set_attr "prefix" "orig,vex")
3797 (set_attr "mode" "V4SF")])
3799 (define_insn "sse4_1_insertps"
3800 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3801 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3802 (match_operand:V4SF 1 "register_operand" "0,x")
3803 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3807 if (MEM_P (operands[2]))
3809 unsigned count_s = INTVAL (operands[3]) >> 6;
3811 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3812 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3814 switch (which_alternative)
3817 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3819 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3824 [(set_attr "isa" "noavx,avx")
3825 (set_attr "type" "sselog")
3826 (set_attr "prefix_data16" "1,*")
3827 (set_attr "prefix_extra" "1")
3828 (set_attr "length_immediate" "1")
3829 (set_attr "prefix" "orig,vex")
3830 (set_attr "mode" "V4SF")])
3833 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3835 (vec_duplicate:VI4F_128
3836 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3839 "TARGET_SSE && reload_completed"
3842 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3847 (define_expand "vec_set<mode>"
3848 [(match_operand:V 0 "register_operand" "")
3849 (match_operand:<ssescalarmode> 1 "register_operand" "")
3850 (match_operand 2 "const_int_operand" "")]
3853 ix86_expand_vector_set (false, operands[0], operands[1],
3854 INTVAL (operands[2]));
3858 (define_insn_and_split "*vec_extractv4sf_0"
3859 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3861 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3862 (parallel [(const_int 0)])))]
3863 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3865 "&& reload_completed"
3868 rtx op1 = operands[1];
3870 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3872 op1 = gen_lowpart (SFmode, op1);
3873 emit_move_insn (operands[0], op1);
3877 (define_expand "avx_vextractf128<mode>"
3878 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3879 (match_operand:V_256 1 "register_operand" "")
3880 (match_operand:SI 2 "const_0_to_1_operand" "")]
3883 rtx (*insn)(rtx, rtx);
3885 switch (INTVAL (operands[2]))
3888 insn = gen_vec_extract_lo_<mode>;
3891 insn = gen_vec_extract_hi_<mode>;
3897 emit_insn (insn (operands[0], operands[1]));
3901 (define_insn_and_split "vec_extract_lo_<mode>"
3902 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3903 (vec_select:<ssehalfvecmode>
3904 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3905 (parallel [(const_int 0) (const_int 1)])))]
3906 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3908 "&& reload_completed"
3911 rtx op1 = operands[1];
3913 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3915 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3916 emit_move_insn (operands[0], op1);
3920 (define_insn "vec_extract_hi_<mode>"
3921 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3922 (vec_select:<ssehalfvecmode>
3923 (match_operand:VI8F_256 1 "register_operand" "x,x")
3924 (parallel [(const_int 2) (const_int 3)])))]
3926 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3927 [(set_attr "type" "sselog")
3928 (set_attr "prefix_extra" "1")
3929 (set_attr "length_immediate" "1")
3930 (set_attr "memory" "none,store")
3931 (set_attr "prefix" "vex")
3932 (set_attr "mode" "<sseinsnmode>")])
3934 (define_insn_and_split "vec_extract_lo_<mode>"
3935 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3936 (vec_select:<ssehalfvecmode>
3937 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3938 (parallel [(const_int 0) (const_int 1)
3939 (const_int 2) (const_int 3)])))]
3940 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3942 "&& reload_completed"
3945 rtx op1 = operands[1];
3947 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3949 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3950 emit_move_insn (operands[0], op1);
3954 (define_insn "vec_extract_hi_<mode>"
3955 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3956 (vec_select:<ssehalfvecmode>
3957 (match_operand:VI4F_256 1 "register_operand" "x,x")
3958 (parallel [(const_int 4) (const_int 5)
3959 (const_int 6) (const_int 7)])))]
3961 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3962 [(set_attr "type" "sselog")
3963 (set_attr "prefix_extra" "1")
3964 (set_attr "length_immediate" "1")
3965 (set_attr "memory" "none,store")
3966 (set_attr "prefix" "vex")
3967 (set_attr "mode" "<sseinsnmode>")])
3969 (define_insn_and_split "vec_extract_lo_v16hi"
3970 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3972 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3973 (parallel [(const_int 0) (const_int 1)
3974 (const_int 2) (const_int 3)
3975 (const_int 4) (const_int 5)
3976 (const_int 6) (const_int 7)])))]
3977 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3979 "&& reload_completed"
3982 rtx op1 = operands[1];
3984 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3986 op1 = gen_lowpart (V8HImode, op1);
3987 emit_move_insn (operands[0], op1);
3991 (define_insn "vec_extract_hi_v16hi"
3992 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3994 (match_operand:V16HI 1 "register_operand" "x,x")
3995 (parallel [(const_int 8) (const_int 9)
3996 (const_int 10) (const_int 11)
3997 (const_int 12) (const_int 13)
3998 (const_int 14) (const_int 15)])))]
4000 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4001 [(set_attr "type" "sselog")
4002 (set_attr "prefix_extra" "1")
4003 (set_attr "length_immediate" "1")
4004 (set_attr "memory" "none,store")
4005 (set_attr "prefix" "vex")
4006 (set_attr "mode" "OI")])
4008 (define_insn_and_split "vec_extract_lo_v32qi"
4009 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4011 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4012 (parallel [(const_int 0) (const_int 1)
4013 (const_int 2) (const_int 3)
4014 (const_int 4) (const_int 5)
4015 (const_int 6) (const_int 7)
4016 (const_int 8) (const_int 9)
4017 (const_int 10) (const_int 11)
4018 (const_int 12) (const_int 13)
4019 (const_int 14) (const_int 15)])))]
4020 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4022 "&& reload_completed"
4025 rtx op1 = operands[1];
4027 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4029 op1 = gen_lowpart (V16QImode, op1);
4030 emit_move_insn (operands[0], op1);
4034 (define_insn "vec_extract_hi_v32qi"
4035 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4037 (match_operand:V32QI 1 "register_operand" "x,x")