1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [(V8SI "TARGET_AVX2") V4SI
130 (V4DI "TARGET_AVX2") V2DI])
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
150 (TI "ssse3") (V2TI "avx2")])
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 (define_mode_attr vec_avx2
165 [(V16QI "vec") (V32QI "avx2")
166 (V8HI "vec") (V16HI "avx2")
167 (V4SI "vec") (V8SI "avx2")
168 (V2DI "vec") (V4DI "avx2")])
170 ;; Mapping of logic-shift operators
171 (define_code_iterator any_lshift [ashift lshiftrt])
173 (define_mode_attr ssedoublemode
174 [(V16HI "V16SI") (V8HI "V8SI")])
176 (define_mode_attr ssebytemode
177 [(V4DI "V32QI") (V2DI "V16QI")])
179 ;; All 128bit vector integer modes
180 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
182 ;; All 256bit vector integer modes
183 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
185 ;; Random 128bit vector integer mode combinations
186 (define_mode_iterator VI12_128 [V16QI V8HI])
187 (define_mode_iterator VI14_128 [V16QI V4SI])
188 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
189 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
190 (define_mode_iterator VI24_128 [V8HI V4SI])
191 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
192 (define_mode_iterator VI48_128 [V4SI V2DI])
194 ;; Random 256bit vector integer mode combinations
195 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
196 (define_mode_iterator VI48_256 [V8SI V4DI])
198 ;; Int-float size matches
199 (define_mode_iterator VI4F_128 [V4SI V4SF])
200 (define_mode_iterator VI8F_128 [V2DI V2DF])
201 (define_mode_iterator VI4F_256 [V8SI V8SF])
202 (define_mode_iterator VI8F_256 [V4DI V4DF])
204 ;; Mapping from float mode to required SSE level
205 (define_mode_attr sse
206 [(SF "sse") (DF "sse2")
207 (V4SF "sse") (V2DF "sse2")
208 (V8SF "avx") (V4DF "avx")])
210 (define_mode_attr sse2
211 [(V16QI "sse2") (V32QI "avx")
212 (V2DI "sse2") (V4DI "avx")])
214 (define_mode_attr sse3
215 [(V16QI "sse3") (V32QI "avx")])
217 (define_mode_attr sse4_1
218 [(V4SF "sse4_1") (V2DF "sse4_1")
219 (V8SF "avx") (V4DF "avx")])
221 (define_mode_attr avxsizesuffix
222 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
223 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
224 (V8SF "256") (V4DF "256")
225 (V4SF "") (V2DF "")])
227 ;; SSE instruction mode
228 (define_mode_attr sseinsnmode
229 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
230 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
231 (V8SF "V8SF") (V4DF "V4DF")
232 (V4SF "V4SF") (V2DF "V2DF")
235 ;; Mapping of vector float modes to an integer mode of the same size
236 (define_mode_attr sseintvecmode
237 [(V8SF "V8SI") (V4DF "V4DI")
238 (V4SF "V4SI") (V2DF "V2DI")
239 (V4DF "V4DI") (V8SF "V8SI")
240 (V8SI "V8SI") (V4DI "V4DI")
241 (V4SI "V4SI") (V2DI "V2DI")
242 (V16HI "V16HI") (V8HI "V8HI")
243 (V32QI "V32QI") (V16QI "V16QI")])
245 ;; Mapping of vector modes to a vector mode of double size
246 (define_mode_attr ssedoublevecmode
247 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
248 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
249 (V8SF "V16SF") (V4DF "V8DF")
250 (V4SF "V8SF") (V2DF "V4DF")])
252 ;; Mapping of vector modes to a vector mode of half size
253 (define_mode_attr ssehalfvecmode
254 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
255 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
256 (V8SF "V4SF") (V4DF "V2DF")
259 ;; Mapping of vector modes back to the scalar modes
260 (define_mode_attr ssescalarmode
261 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
262 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
263 (V8SF "SF") (V4DF "DF")
264 (V4SF "SF") (V2DF "DF")])
266 ;; Number of scalar elements in each vector type
267 (define_mode_attr ssescalarnum
268 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
269 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
270 (V8SF "8") (V4DF "4")
271 (V4SF "4") (V2DF "2")])
273 ;; SSE prefix for integer vector modes
274 (define_mode_attr sseintprefix
275 [(V2DI "p") (V2DF "")
278 (V8SI "p") (V8SF "")])
280 ;; SSE scalar suffix for vector modes
281 (define_mode_attr ssescalarmodesuffix
283 (V8SF "ss") (V4DF "sd")
284 (V4SF "ss") (V2DF "sd")
285 (V8SI "ss") (V4DI "sd")
288 ;; Pack/unpack vector modes
289 (define_mode_attr sseunpackmode
290 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
291 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
293 (define_mode_attr ssepackmode
294 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
295 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
297 ;; Mapping of the max integer size for xop rotate immediate constraint
298 (define_mode_attr sserotatemax
299 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
301 ;; Mapping of mode to cast intrinsic name
302 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
304 ;; Instruction suffix for sign and zero extensions.
305 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
307 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
308 (define_mode_attr i128
309 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
310 (V8SI "%~128") (V4DI "%~128")])
313 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
315 (define_mode_iterator AVXMODE48P_DI
316 [V2DI V2DF V4DI V4DF V4SF V4SI])
317 (define_mode_attr AVXMODE48P_DI
318 [(V2DI "V2DI") (V2DF "V2DI")
319 (V4DI "V4DI") (V4DF "V4DI")
320 (V4SI "V2DI") (V4SF "V2DI")
321 (V8SI "V4DI") (V8SF "V4DI")])
323 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
325 ;; Mapping of immediate bits for blend instructions
326 (define_mode_attr blendbits
327 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
329 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
337 ;; All of these patterns are enabled for SSE1 as well as SSE2.
338 ;; This is essential for maintaining stable calling conventions.
340 (define_expand "mov<mode>"
341 [(set (match_operand:V16 0 "nonimmediate_operand" "")
342 (match_operand:V16 1 "nonimmediate_operand" ""))]
345 ix86_expand_vector_move (<MODE>mode, operands);
349 (define_insn "*mov<mode>_internal"
350 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
351 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
353 && (register_operand (operands[0], <MODE>mode)
354 || register_operand (operands[1], <MODE>mode))"
356 switch (which_alternative)
359 return standard_sse_constant_opcode (insn, operands[1]);
362 switch (get_attr_mode (insn))
367 && (misaligned_operand (operands[0], <MODE>mode)
368 || misaligned_operand (operands[1], <MODE>mode)))
369 return "vmovups\t{%1, %0|%0, %1}";
371 return "%vmovaps\t{%1, %0|%0, %1}";
376 && (misaligned_operand (operands[0], <MODE>mode)
377 || misaligned_operand (operands[1], <MODE>mode)))
378 return "vmovupd\t{%1, %0|%0, %1}";
379 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
380 return "%vmovaps\t{%1, %0|%0, %1}";
382 return "%vmovapd\t{%1, %0|%0, %1}";
387 && (misaligned_operand (operands[0], <MODE>mode)
388 || misaligned_operand (operands[1], <MODE>mode)))
389 return "vmovdqu\t{%1, %0|%0, %1}";
390 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
391 return "%vmovaps\t{%1, %0|%0, %1}";
393 return "%vmovdqa\t{%1, %0|%0, %1}";
402 [(set_attr "type" "sselog1,ssemov,ssemov")
403 (set_attr "prefix" "maybe_vex")
405 (cond [(match_test "TARGET_AVX")
406 (const_string "<sseinsnmode>")
407 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
408 (not (match_test "TARGET_SSE2")))
409 (and (eq_attr "alternative" "2")
410 (match_test "TARGET_SSE_TYPELESS_STORES")))
411 (const_string "V4SF")
412 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
413 (const_string "V4SF")
414 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
415 (const_string "V2DF")
417 (const_string "TI")))])
419 (define_insn "sse2_movq128"
420 [(set (match_operand:V2DI 0 "register_operand" "=x")
423 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
424 (parallel [(const_int 0)]))
427 "%vmovq\t{%1, %0|%0, %1}"
428 [(set_attr "type" "ssemov")
429 (set_attr "prefix" "maybe_vex")
430 (set_attr "mode" "TI")])
432 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
433 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
434 ;; from memory, we'd prefer to load the memory directly into the %xmm
435 ;; register. To facilitate this happy circumstance, this pattern won't
436 ;; split until after register allocation. If the 64-bit value didn't
437 ;; come from memory, this is the best we can do. This is much better
438 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
441 (define_insn_and_split "movdi_to_sse"
443 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
444 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
445 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
446 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
448 "&& reload_completed"
451 if (register_operand (operands[1], DImode))
453 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
454 Assemble the 64-bit DImode value in an xmm register. */
455 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
456 gen_rtx_SUBREG (SImode, operands[1], 0)));
457 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
458 gen_rtx_SUBREG (SImode, operands[1], 4)));
459 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
462 else if (memory_operand (operands[1], DImode))
463 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
464 operands[1], const0_rtx));
470 [(set (match_operand:V4SF 0 "register_operand" "")
471 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
472 "TARGET_SSE && reload_completed"
475 (vec_duplicate:V4SF (match_dup 1))
479 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
480 operands[2] = CONST0_RTX (V4SFmode);
484 [(set (match_operand:V2DF 0 "register_operand" "")
485 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
486 "TARGET_SSE2 && reload_completed"
487 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
489 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
490 operands[2] = CONST0_RTX (DFmode);
493 (define_expand "push<mode>1"
494 [(match_operand:V16 0 "register_operand" "")]
497 ix86_expand_push (<MODE>mode, operands[0]);
501 (define_expand "movmisalign<mode>"
502 [(set (match_operand:V16 0 "nonimmediate_operand" "")
503 (match_operand:V16 1 "nonimmediate_operand" ""))]
506 ix86_expand_vector_move_misalign (<MODE>mode, operands);
510 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
511 [(set (match_operand:VF 0 "nonimmediate_operand" "")
513 [(match_operand:VF 1 "nonimmediate_operand" "")]
517 if (MEM_P (operands[0]) && MEM_P (operands[1]))
518 operands[1] = force_reg (<MODE>mode, operands[1]);
521 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
522 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
524 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
526 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
527 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
528 [(set_attr "type" "ssemov")
529 (set_attr "movu" "1")
530 (set_attr "prefix" "maybe_vex")
531 (set_attr "mode" "<MODE>")])
533 (define_expand "<sse2>_movdqu<avxsizesuffix>"
534 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
535 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
539 if (MEM_P (operands[0]) && MEM_P (operands[1]))
540 operands[1] = force_reg (<MODE>mode, operands[1]);
543 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
544 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
545 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
547 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
548 "%vmovdqu\t{%1, %0|%0, %1}"
549 [(set_attr "type" "ssemov")
550 (set_attr "movu" "1")
551 (set (attr "prefix_data16")
553 (match_test "TARGET_AVX")
556 (set_attr "prefix" "maybe_vex")
557 (set_attr "mode" "<sseinsnmode>")])
559 (define_insn "<sse3>_lddqu<avxsizesuffix>"
560 [(set (match_operand:VI1 0 "register_operand" "=x")
561 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
564 "%vlddqu\t{%1, %0|%0, %1}"
565 [(set_attr "type" "ssemov")
566 (set_attr "movu" "1")
567 (set (attr "prefix_data16")
569 (match_test "TARGET_AVX")
572 (set (attr "prefix_rep")
574 (match_test "TARGET_AVX")
577 (set_attr "prefix" "maybe_vex")
578 (set_attr "mode" "<sseinsnmode>")])
580 (define_insn "sse2_movntsi"
581 [(set (match_operand:SI 0 "memory_operand" "=m")
582 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
585 "movnti\t{%1, %0|%0, %1}"
586 [(set_attr "type" "ssemov")
587 (set_attr "prefix_data16" "0")
588 (set_attr "mode" "V2DF")])
590 (define_insn "<sse>_movnt<mode>"
591 [(set (match_operand:VF 0 "memory_operand" "=m")
592 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
595 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
596 [(set_attr "type" "ssemov")
597 (set_attr "prefix" "maybe_vex")
598 (set_attr "mode" "<MODE>")])
600 (define_insn "<sse2>_movnt<mode>"
601 [(set (match_operand:VI8 0 "memory_operand" "=m")
602 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
605 "%vmovntdq\t{%1, %0|%0, %1}"
606 [(set_attr "type" "ssecvt")
607 (set (attr "prefix_data16")
609 (match_test "TARGET_AVX")
612 (set_attr "prefix" "maybe_vex")
613 (set_attr "mode" "<sseinsnmode>")])
615 ; Expand patterns for non-temporal stores. At the moment, only those
616 ; that directly map to insns are defined; it would be possible to
617 ; define patterns for other modes that would expand to several insns.
619 ;; Modes handled by storent patterns.
620 (define_mode_iterator STORENT_MODE
621 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
623 (V8SF "TARGET_AVX") V4SF
624 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
626 (define_expand "storent<mode>"
627 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
629 [(match_operand:STORENT_MODE 1 "register_operand" "")]
633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
635 ;; Parallel floating point arithmetic
637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
639 (define_expand "<code><mode>2"
640 [(set (match_operand:VF 0 "register_operand" "")
642 (match_operand:VF 1 "register_operand" "")))]
644 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
646 (define_insn_and_split "*absneg<mode>2"
647 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
648 (match_operator:VF 3 "absneg_operator"
649 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
650 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
653 "&& reload_completed"
656 enum rtx_code absneg_op;
662 if (MEM_P (operands[1]))
663 op1 = operands[2], op2 = operands[1];
665 op1 = operands[1], op2 = operands[2];
670 if (rtx_equal_p (operands[0], operands[1]))
676 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
677 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
678 t = gen_rtx_SET (VOIDmode, operands[0], t);
682 [(set_attr "isa" "noavx,noavx,avx,avx")])
684 (define_expand "<plusminus_insn><mode>3"
685 [(set (match_operand:VF 0 "register_operand" "")
687 (match_operand:VF 1 "nonimmediate_operand" "")
688 (match_operand:VF 2 "nonimmediate_operand" "")))]
690 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
692 (define_insn "*<plusminus_insn><mode>3"
693 [(set (match_operand:VF 0 "register_operand" "=x,x")
695 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
696 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
697 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
699 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
700 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
701 [(set_attr "isa" "noavx,avx")
702 (set_attr "type" "sseadd")
703 (set_attr "prefix" "orig,vex")
704 (set_attr "mode" "<MODE>")])
706 (define_insn "<sse>_vm<plusminus_insn><mode>3"
707 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
710 (match_operand:VF_128 1 "register_operand" "0,x")
711 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
716 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
717 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
718 [(set_attr "isa" "noavx,avx")
719 (set_attr "type" "sseadd")
720 (set_attr "prefix" "orig,vex")
721 (set_attr "mode" "<ssescalarmode>")])
723 (define_expand "mul<mode>3"
724 [(set (match_operand:VF 0 "register_operand" "")
726 (match_operand:VF 1 "nonimmediate_operand" "")
727 (match_operand:VF 2 "nonimmediate_operand" "")))]
729 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
731 (define_insn "*mul<mode>3"
732 [(set (match_operand:VF 0 "register_operand" "=x,x")
734 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
735 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
736 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
738 mul<ssemodesuffix>\t{%2, %0|%0, %2}
739 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
740 [(set_attr "isa" "noavx,avx")
741 (set_attr "type" "ssemul")
742 (set_attr "prefix" "orig,vex")
743 (set_attr "mode" "<MODE>")])
745 (define_insn "<sse>_vmmul<mode>3"
746 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
749 (match_operand:VF_128 1 "register_operand" "0,x")
750 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
755 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
756 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
757 [(set_attr "isa" "noavx,avx")
758 (set_attr "type" "ssemul")
759 (set_attr "prefix" "orig,vex")
760 (set_attr "mode" "<ssescalarmode>")])
762 (define_expand "div<mode>3"
763 [(set (match_operand:VF2 0 "register_operand" "")
764 (div:VF2 (match_operand:VF2 1 "register_operand" "")
765 (match_operand:VF2 2 "nonimmediate_operand" "")))]
767 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
769 (define_expand "div<mode>3"
770 [(set (match_operand:VF1 0 "register_operand" "")
771 (div:VF1 (match_operand:VF1 1 "register_operand" "")
772 (match_operand:VF1 2 "nonimmediate_operand" "")))]
775 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
778 && TARGET_RECIP_VEC_DIV
779 && !optimize_insn_for_size_p ()
780 && flag_finite_math_only && !flag_trapping_math
781 && flag_unsafe_math_optimizations)
783 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
788 (define_insn "<sse>_div<mode>3"
789 [(set (match_operand:VF 0 "register_operand" "=x,x")
791 (match_operand:VF 1 "register_operand" "0,x")
792 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
795 div<ssemodesuffix>\t{%2, %0|%0, %2}
796 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
797 [(set_attr "isa" "noavx,avx")
798 (set_attr "type" "ssediv")
799 (set_attr "prefix" "orig,vex")
800 (set_attr "mode" "<MODE>")])
802 (define_insn "<sse>_vmdiv<mode>3"
803 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
806 (match_operand:VF_128 1 "register_operand" "0,x")
807 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
812 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
813 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
814 [(set_attr "isa" "noavx,avx")
815 (set_attr "type" "ssediv")
816 (set_attr "prefix" "orig,vex")
817 (set_attr "mode" "<ssescalarmode>")])
819 (define_insn "<sse>_rcp<mode>2"
820 [(set (match_operand:VF1 0 "register_operand" "=x")
822 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
824 "%vrcpps\t{%1, %0|%0, %1}"
825 [(set_attr "type" "sse")
826 (set_attr "atom_sse_attr" "rcp")
827 (set_attr "prefix" "maybe_vex")
828 (set_attr "mode" "<MODE>")])
830 (define_insn "sse_vmrcpv4sf2"
831 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
833 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
835 (match_operand:V4SF 2 "register_operand" "0,x")
839 rcpss\t{%1, %0|%0, %1}
840 vrcpss\t{%1, %2, %0|%0, %2, %1}"
841 [(set_attr "isa" "noavx,avx")
842 (set_attr "type" "sse")
843 (set_attr "atom_sse_attr" "rcp")
844 (set_attr "prefix" "orig,vex")
845 (set_attr "mode" "SF")])
847 (define_expand "sqrt<mode>2"
848 [(set (match_operand:VF2 0 "register_operand" "")
849 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
852 (define_expand "sqrt<mode>2"
853 [(set (match_operand:VF1 0 "register_operand" "")
854 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
858 && TARGET_RECIP_VEC_SQRT
859 && !optimize_insn_for_size_p ()
860 && flag_finite_math_only && !flag_trapping_math
861 && flag_unsafe_math_optimizations)
863 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
868 (define_insn "<sse>_sqrt<mode>2"
869 [(set (match_operand:VF 0 "register_operand" "=x")
870 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
872 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
873 [(set_attr "type" "sse")
874 (set_attr "atom_sse_attr" "sqrt")
875 (set_attr "prefix" "maybe_vex")
876 (set_attr "mode" "<MODE>")])
878 (define_insn "<sse>_vmsqrt<mode>2"
879 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
882 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
883 (match_operand:VF_128 2 "register_operand" "0,x")
887 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
888 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
889 [(set_attr "isa" "noavx,avx")
890 (set_attr "type" "sse")
891 (set_attr "atom_sse_attr" "sqrt")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<ssescalarmode>")])
895 (define_expand "rsqrt<mode>2"
896 [(set (match_operand:VF1 0 "register_operand" "")
898 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
901 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
905 (define_insn "<sse>_rsqrt<mode>2"
906 [(set (match_operand:VF1 0 "register_operand" "=x")
908 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
910 "%vrsqrtps\t{%1, %0|%0, %1}"
911 [(set_attr "type" "sse")
912 (set_attr "prefix" "maybe_vex")
913 (set_attr "mode" "<MODE>")])
915 (define_insn "sse_vmrsqrtv4sf2"
916 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
918 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
920 (match_operand:V4SF 2 "register_operand" "0,x")
924 rsqrtss\t{%1, %0|%0, %1}
925 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
926 [(set_attr "isa" "noavx,avx")
927 (set_attr "type" "sse")
928 (set_attr "prefix" "orig,vex")
929 (set_attr "mode" "SF")])
931 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
932 ;; isn't really correct, as those rtl operators aren't defined when
933 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
935 (define_expand "<code><mode>3"
936 [(set (match_operand:VF 0 "register_operand" "")
938 (match_operand:VF 1 "nonimmediate_operand" "")
939 (match_operand:VF 2 "nonimmediate_operand" "")))]
942 if (!flag_finite_math_only)
943 operands[1] = force_reg (<MODE>mode, operands[1]);
944 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
947 (define_insn "*<code><mode>3_finite"
948 [(set (match_operand:VF 0 "register_operand" "=x,x")
950 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
951 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
952 "TARGET_SSE && flag_finite_math_only
953 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
955 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
956 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
957 [(set_attr "isa" "noavx,avx")
958 (set_attr "type" "sseadd")
959 (set_attr "prefix" "orig,vex")
960 (set_attr "mode" "<MODE>")])
962 (define_insn "*<code><mode>3"
963 [(set (match_operand:VF 0 "register_operand" "=x,x")
965 (match_operand:VF 1 "register_operand" "0,x")
966 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
967 "TARGET_SSE && !flag_finite_math_only"
969 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
970 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
971 [(set_attr "isa" "noavx,avx")
972 (set_attr "type" "sseadd")
973 (set_attr "prefix" "orig,vex")
974 (set_attr "mode" "<MODE>")])
976 (define_insn "<sse>_vm<code><mode>3"
977 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
980 (match_operand:VF_128 1 "register_operand" "0,x")
981 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
986 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
987 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
988 [(set_attr "isa" "noavx,avx")
989 (set_attr "type" "sse")
990 (set_attr "prefix" "orig,vex")
991 (set_attr "mode" "<ssescalarmode>")])
993 ;; These versions of the min/max patterns implement exactly the operations
994 ;; min = (op1 < op2 ? op1 : op2)
995 ;; max = (!(op1 < op2) ? op1 : op2)
996 ;; Their operands are not commutative, and thus they may be used in the
997 ;; presence of -0.0 and NaN.
999 (define_insn "*ieee_smin<mode>3"
1000 [(set (match_operand:VF 0 "register_operand" "=x,x")
1002 [(match_operand:VF 1 "register_operand" "0,x")
1003 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1007 min<ssemodesuffix>\t{%2, %0|%0, %2}
1008 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1009 [(set_attr "isa" "noavx,avx")
1010 (set_attr "type" "sseadd")
1011 (set_attr "prefix" "orig,vex")
1012 (set_attr "mode" "<MODE>")])
1014 (define_insn "*ieee_smax<mode>3"
1015 [(set (match_operand:VF 0 "register_operand" "=x,x")
1017 [(match_operand:VF 1 "register_operand" "0,x")
1018 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1022 max<ssemodesuffix>\t{%2, %0|%0, %2}
1023 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "isa" "noavx,avx")
1025 (set_attr "type" "sseadd")
1026 (set_attr "prefix" "orig,vex")
1027 (set_attr "mode" "<MODE>")])
1029 (define_insn "avx_addsubv4df3"
1030 [(set (match_operand:V4DF 0 "register_operand" "=x")
1033 (match_operand:V4DF 1 "register_operand" "x")
1034 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1035 (minus:V4DF (match_dup 1) (match_dup 2))
1038 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1039 [(set_attr "type" "sseadd")
1040 (set_attr "prefix" "vex")
1041 (set_attr "mode" "V4DF")])
1043 (define_insn "sse3_addsubv2df3"
1044 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1047 (match_operand:V2DF 1 "register_operand" "0,x")
1048 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1049 (minus:V2DF (match_dup 1) (match_dup 2))
1053 addsubpd\t{%2, %0|%0, %2}
1054 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1055 [(set_attr "isa" "noavx,avx")
1056 (set_attr "type" "sseadd")
1057 (set_attr "atom_unit" "complex")
1058 (set_attr "prefix" "orig,vex")
1059 (set_attr "mode" "V2DF")])
1061 (define_insn "avx_addsubv8sf3"
1062 [(set (match_operand:V8SF 0 "register_operand" "=x")
1065 (match_operand:V8SF 1 "register_operand" "x")
1066 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1067 (minus:V8SF (match_dup 1) (match_dup 2))
1070 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1071 [(set_attr "type" "sseadd")
1072 (set_attr "prefix" "vex")
1073 (set_attr "mode" "V8SF")])
1075 (define_insn "sse3_addsubv4sf3"
1076 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1079 (match_operand:V4SF 1 "register_operand" "0,x")
1080 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1081 (minus:V4SF (match_dup 1) (match_dup 2))
1085 addsubps\t{%2, %0|%0, %2}
1086 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1087 [(set_attr "isa" "noavx,avx")
1088 (set_attr "type" "sseadd")
1089 (set_attr "prefix" "orig,vex")
1090 (set_attr "prefix_rep" "1,*")
1091 (set_attr "mode" "V4SF")])
1093 (define_insn "avx_h<plusminus_insn>v4df3"
1094 [(set (match_operand:V4DF 0 "register_operand" "=x")
1099 (match_operand:V4DF 1 "register_operand" "x")
1100 (parallel [(const_int 0)]))
1101 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1103 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1104 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1108 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1109 (parallel [(const_int 0)]))
1110 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1112 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1113 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1115 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1116 [(set_attr "type" "sseadd")
1117 (set_attr "prefix" "vex")
1118 (set_attr "mode" "V4DF")])
1120 (define_insn "sse3_h<plusminus_insn>v2df3"
1121 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1125 (match_operand:V2DF 1 "register_operand" "0,x")
1126 (parallel [(const_int 0)]))
1127 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1130 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1131 (parallel [(const_int 0)]))
1132 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1135 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1136 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1137 [(set_attr "isa" "noavx,avx")
1138 (set_attr "type" "sseadd")
1139 (set_attr "prefix" "orig,vex")
1140 (set_attr "mode" "V2DF")])
1142 (define_insn "avx_h<plusminus_insn>v8sf3"
1143 [(set (match_operand:V8SF 0 "register_operand" "=x")
1149 (match_operand:V8SF 1 "register_operand" "x")
1150 (parallel [(const_int 0)]))
1151 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1153 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1154 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1158 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1159 (parallel [(const_int 0)]))
1160 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1162 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1163 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1167 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1168 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1170 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1171 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1174 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1175 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1177 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1178 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1180 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1181 [(set_attr "type" "sseadd")
1182 (set_attr "prefix" "vex")
1183 (set_attr "mode" "V8SF")])
1185 (define_insn "sse3_h<plusminus_insn>v4sf3"
1186 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1191 (match_operand:V4SF 1 "register_operand" "0,x")
1192 (parallel [(const_int 0)]))
1193 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1195 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1196 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1200 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1201 (parallel [(const_int 0)]))
1202 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1204 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1205 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1208 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1209 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1210 [(set_attr "isa" "noavx,avx")
1211 (set_attr "type" "sseadd")
1212 (set_attr "atom_unit" "complex")
1213 (set_attr "prefix" "orig,vex")
1214 (set_attr "prefix_rep" "1,*")
1215 (set_attr "mode" "V4SF")])
1217 (define_expand "reduc_splus_v4df"
1218 [(match_operand:V4DF 0 "register_operand" "")
1219 (match_operand:V4DF 1 "register_operand" "")]
1222 rtx tmp = gen_reg_rtx (V4DFmode);
1223 rtx tmp2 = gen_reg_rtx (V4DFmode);
1224 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1225 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1226 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1230 (define_expand "reduc_splus_v2df"
1231 [(match_operand:V2DF 0 "register_operand" "")
1232 (match_operand:V2DF 1 "register_operand" "")]
1235 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1239 (define_expand "reduc_splus_v8sf"
1240 [(match_operand:V8SF 0 "register_operand" "")
1241 (match_operand:V8SF 1 "register_operand" "")]
1244 rtx tmp = gen_reg_rtx (V8SFmode);
1245 rtx tmp2 = gen_reg_rtx (V8SFmode);
1246 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1247 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1248 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1249 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1253 (define_expand "reduc_splus_v4sf"
1254 [(match_operand:V4SF 0 "register_operand" "")
1255 (match_operand:V4SF 1 "register_operand" "")]
1260 rtx tmp = gen_reg_rtx (V4SFmode);
1261 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1262 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1265 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1269 ;; Modes handled by reduc_sm{in,ax}* patterns.
1270 (define_mode_iterator REDUC_SMINMAX_MODE
1271 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1272 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1273 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1274 (V4SF "TARGET_SSE")])
1276 (define_expand "reduc_<code>_<mode>"
1277 [(smaxmin:REDUC_SMINMAX_MODE
1278 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1279 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1282 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1286 (define_expand "reduc_<code>_<mode>"
1288 (match_operand:VI_256 0 "register_operand" "")
1289 (match_operand:VI_256 1 "register_operand" ""))]
1292 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1296 (define_expand "reduc_umin_v8hi"
1298 (match_operand:V8HI 0 "register_operand" "")
1299 (match_operand:V8HI 1 "register_operand" ""))]
1302 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1308 ;; Parallel floating point comparisons
1310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1312 (define_insn "avx_cmp<mode>3"
1313 [(set (match_operand:VF 0 "register_operand" "=x")
1315 [(match_operand:VF 1 "register_operand" "x")
1316 (match_operand:VF 2 "nonimmediate_operand" "xm")
1317 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1320 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1321 [(set_attr "type" "ssecmp")
1322 (set_attr "length_immediate" "1")
1323 (set_attr "prefix" "vex")
1324 (set_attr "mode" "<MODE>")])
1326 (define_insn "avx_vmcmp<mode>3"
1327 [(set (match_operand:VF_128 0 "register_operand" "=x")
1330 [(match_operand:VF_128 1 "register_operand" "x")
1331 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1332 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1337 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1338 [(set_attr "type" "ssecmp")
1339 (set_attr "length_immediate" "1")
1340 (set_attr "prefix" "vex")
1341 (set_attr "mode" "<ssescalarmode>")])
1343 (define_insn "*<sse>_maskcmp<mode>3_comm"
1344 [(set (match_operand:VF 0 "register_operand" "=x,x")
1345 (match_operator:VF 3 "sse_comparison_operator"
1346 [(match_operand:VF 1 "register_operand" "%0,x")
1347 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1349 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1351 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1352 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1353 [(set_attr "isa" "noavx,avx")
1354 (set_attr "type" "ssecmp")
1355 (set_attr "length_immediate" "1")
1356 (set_attr "prefix" "orig,vex")
1357 (set_attr "mode" "<MODE>")])
1359 (define_insn "<sse>_maskcmp<mode>3"
1360 [(set (match_operand:VF 0 "register_operand" "=x,x")
1361 (match_operator:VF 3 "sse_comparison_operator"
1362 [(match_operand:VF 1 "register_operand" "0,x")
1363 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1366 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1367 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1368 [(set_attr "isa" "noavx,avx")
1369 (set_attr "type" "ssecmp")
1370 (set_attr "length_immediate" "1")
1371 (set_attr "prefix" "orig,vex")
1372 (set_attr "mode" "<MODE>")])
1374 (define_insn "<sse>_vmmaskcmp<mode>3"
1375 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1377 (match_operator:VF_128 3 "sse_comparison_operator"
1378 [(match_operand:VF_128 1 "register_operand" "0,x")
1379 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1384 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1385 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1386 [(set_attr "isa" "noavx,avx")
1387 (set_attr "type" "ssecmp")
1388 (set_attr "length_immediate" "1,*")
1389 (set_attr "prefix" "orig,vex")
1390 (set_attr "mode" "<ssescalarmode>")])
1392 (define_insn "<sse>_comi"
1393 [(set (reg:CCFP FLAGS_REG)
1396 (match_operand:<ssevecmode> 0 "register_operand" "x")
1397 (parallel [(const_int 0)]))
1399 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1400 (parallel [(const_int 0)]))))]
1401 "SSE_FLOAT_MODE_P (<MODE>mode)"
1402 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1403 [(set_attr "type" "ssecomi")
1404 (set_attr "prefix" "maybe_vex")
1405 (set_attr "prefix_rep" "0")
1406 (set (attr "prefix_data16")
1407 (if_then_else (eq_attr "mode" "DF")
1409 (const_string "0")))
1410 (set_attr "mode" "<MODE>")])
1412 (define_insn "<sse>_ucomi"
1413 [(set (reg:CCFPU FLAGS_REG)
1416 (match_operand:<ssevecmode> 0 "register_operand" "x")
1417 (parallel [(const_int 0)]))
1419 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1420 (parallel [(const_int 0)]))))]
1421 "SSE_FLOAT_MODE_P (<MODE>mode)"
1422 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1423 [(set_attr "type" "ssecomi")
1424 (set_attr "prefix" "maybe_vex")
1425 (set_attr "prefix_rep" "0")
1426 (set (attr "prefix_data16")
1427 (if_then_else (eq_attr "mode" "DF")
1429 (const_string "0")))
1430 (set_attr "mode" "<MODE>")])
1432 (define_expand "vcond<V_256:mode><VF_256:mode>"
1433 [(set (match_operand:V_256 0 "register_operand" "")
1435 (match_operator 3 ""
1436 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1437 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1438 (match_operand:V_256 1 "general_operand" "")
1439 (match_operand:V_256 2 "general_operand" "")))]
1441 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1442 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1444 bool ok = ix86_expand_fp_vcond (operands);
1449 (define_expand "vcond<V_128:mode><VF_128:mode>"
1450 [(set (match_operand:V_128 0 "register_operand" "")
1452 (match_operator 3 ""
1453 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1454 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1455 (match_operand:V_128 1 "general_operand" "")
1456 (match_operand:V_128 2 "general_operand" "")))]
1458 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1459 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1461 bool ok = ix86_expand_fp_vcond (operands);
1466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1468 ;; Parallel floating point logical operations
1470 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1472 (define_insn "<sse>_andnot<mode>3"
1473 [(set (match_operand:VF 0 "register_operand" "=x,x")
1476 (match_operand:VF 1 "register_operand" "0,x"))
1477 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1480 static char buf[32];
1483 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1485 switch (which_alternative)
1488 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1491 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1497 snprintf (buf, sizeof (buf), insn, suffix);
1500 [(set_attr "isa" "noavx,avx")
1501 (set_attr "type" "sselog")
1502 (set_attr "prefix" "orig,vex")
1503 (set_attr "mode" "<MODE>")])
1505 (define_expand "<code><mode>3"
1506 [(set (match_operand:VF 0 "register_operand" "")
1508 (match_operand:VF 1 "nonimmediate_operand" "")
1509 (match_operand:VF 2 "nonimmediate_operand" "")))]
1511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1513 (define_insn "*<code><mode>3"
1514 [(set (match_operand:VF 0 "register_operand" "=x,x")
1516 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1517 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1518 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1520 static char buf[32];
1523 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1525 switch (which_alternative)
1528 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1531 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1537 snprintf (buf, sizeof (buf), insn, suffix);
1540 [(set_attr "isa" "noavx,avx")
1541 (set_attr "type" "sselog")
1542 (set_attr "prefix" "orig,vex")
1543 (set_attr "mode" "<MODE>")])
1545 (define_expand "copysign<mode>3"
1548 (not:VF (match_dup 3))
1549 (match_operand:VF 1 "nonimmediate_operand" "")))
1551 (and:VF (match_dup 3)
1552 (match_operand:VF 2 "nonimmediate_operand" "")))
1553 (set (match_operand:VF 0 "register_operand" "")
1554 (ior:VF (match_dup 4) (match_dup 5)))]
1557 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1559 operands[4] = gen_reg_rtx (<MODE>mode);
1560 operands[5] = gen_reg_rtx (<MODE>mode);
1563 ;; Also define scalar versions. These are used for abs, neg, and
1564 ;; conditional move. Using subregs into vector modes causes register
1565 ;; allocation lossage. These patterns do not allow memory operands
1566 ;; because the native instructions read the full 128-bits.
1568 (define_insn "*andnot<mode>3"
1569 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1572 (match_operand:MODEF 1 "register_operand" "0,x"))
1573 (match_operand:MODEF 2 "register_operand" "x,x")))]
1574 "SSE_FLOAT_MODE_P (<MODE>mode)"
1576 static char buf[32];
1579 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1581 switch (which_alternative)
1584 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1587 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1593 snprintf (buf, sizeof (buf), insn, suffix);
1596 [(set_attr "isa" "noavx,avx")
1597 (set_attr "type" "sselog")
1598 (set_attr "prefix" "orig,vex")
1599 (set_attr "mode" "<ssevecmode>")])
1601 (define_insn "*<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1604 (match_operand:MODEF 1 "register_operand" "%0,x")
1605 (match_operand:MODEF 2 "register_operand" "x,x")))]
1606 "SSE_FLOAT_MODE_P (<MODE>mode)"
1608 static char buf[32];
1611 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1613 switch (which_alternative)
1616 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1619 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1625 snprintf (buf, sizeof (buf), insn, suffix);
1628 [(set_attr "isa" "noavx,avx")
1629 (set_attr "type" "sselog")
1630 (set_attr "prefix" "orig,vex")
1631 (set_attr "mode" "<ssevecmode>")])
1633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1635 ;; FMA4 floating point multiply/accumulate instructions. This
1636 ;; includes the scalar version of the instructions as well as the
1639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1641 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1642 ;; combine to generate a multiply/add with two memory references. We then
1643 ;; split this insn, into loading up the destination register with one of the
1644 ;; memory operations. If we don't manage to split the insn, reload will
1645 ;; generate the appropriate moves. The reason this is needed, is that combine
1646 ;; has already folded one of the memory references into both the multiply and
1647 ;; add insns, and it can't generate a new pseudo. I.e.:
1648 ;; (set (reg1) (mem (addr1)))
1649 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1650 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1652 ;; ??? This is historic, pre-dating the gimple fma transformation.
1653 ;; We could now properly represent that only one memory operand is
1654 ;; allowed and not be penalized during optimization.
1656 ;; Intrinsic FMA operations.
1658 ;; The standard names for fma is only available with SSE math enabled.
1659 (define_expand "fma<mode>4"
1660 [(set (match_operand:FMAMODE 0 "register_operand")
1662 (match_operand:FMAMODE 1 "nonimmediate_operand")
1663 (match_operand:FMAMODE 2 "nonimmediate_operand")
1664 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1665 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1667 (define_expand "fms<mode>4"
1668 [(set (match_operand:FMAMODE 0 "register_operand")
1670 (match_operand:FMAMODE 1 "nonimmediate_operand")
1671 (match_operand:FMAMODE 2 "nonimmediate_operand")
1672 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1673 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1675 (define_expand "fnma<mode>4"
1676 [(set (match_operand:FMAMODE 0 "register_operand")
1678 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1679 (match_operand:FMAMODE 2 "nonimmediate_operand")
1680 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1681 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1683 (define_expand "fnms<mode>4"
1684 [(set (match_operand:FMAMODE 0 "register_operand")
1686 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1687 (match_operand:FMAMODE 2 "nonimmediate_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1689 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1691 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1692 (define_expand "fma4i_fmadd_<mode>"
1693 [(set (match_operand:FMAMODE 0 "register_operand")
1695 (match_operand:FMAMODE 1 "nonimmediate_operand")
1696 (match_operand:FMAMODE 2 "nonimmediate_operand")
1697 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1698 "TARGET_FMA || TARGET_FMA4")
1700 (define_insn "*fma4i_fmadd_<mode>"
1701 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1703 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1704 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1705 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1707 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1708 [(set_attr "type" "ssemuladd")
1709 (set_attr "mode" "<MODE>")])
1711 (define_insn "*fma4i_fmsub_<mode>"
1712 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1714 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1715 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1717 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1719 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1720 [(set_attr "type" "ssemuladd")
1721 (set_attr "mode" "<MODE>")])
1723 (define_insn "*fma4i_fnmadd_<mode>"
1724 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1727 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1728 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1729 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1731 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1732 [(set_attr "type" "ssemuladd")
1733 (set_attr "mode" "<MODE>")])
1735 (define_insn "*fma4i_fnmsub_<mode>"
1736 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1739 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1740 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1742 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1744 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1748 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1749 ;; entire destination register, with the high-order elements zeroed.
1751 (define_expand "fma4i_vmfmadd_<mode>"
1752 [(set (match_operand:VF_128 0 "register_operand")
1755 (match_operand:VF_128 1 "nonimmediate_operand")
1756 (match_operand:VF_128 2 "nonimmediate_operand")
1757 (match_operand:VF_128 3 "nonimmediate_operand"))
1762 operands[4] = CONST0_RTX (<MODE>mode);
1765 (define_expand "fmai_vmfmadd_<mode>"
1766 [(set (match_operand:VF_128 0 "register_operand")
1769 (match_operand:VF_128 1 "nonimmediate_operand")
1770 (match_operand:VF_128 2 "nonimmediate_operand")
1771 (match_operand:VF_128 3 "nonimmediate_operand"))
1776 (define_insn "*fmai_fmadd_<mode>"
1777 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1780 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1781 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1782 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1787 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1788 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1789 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1790 [(set_attr "type" "ssemuladd")
1791 (set_attr "mode" "<MODE>")])
1793 (define_insn "*fmai_fmsub_<mode>"
1794 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1797 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1798 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1800 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1805 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1806 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1807 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1808 [(set_attr "type" "ssemuladd")
1809 (set_attr "mode" "<MODE>")])
1811 (define_insn "*fmai_fnmadd_<mode>"
1812 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1816 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1817 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1818 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1823 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1824 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1825 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1826 [(set_attr "type" "ssemuladd")
1827 (set_attr "mode" "<MODE>")])
1829 (define_insn "*fmai_fnmsub_<mode>"
1830 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1834 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1835 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1837 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1842 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1843 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1844 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1845 [(set_attr "type" "ssemuladd")
1846 (set_attr "mode" "<MODE>")])
1848 (define_insn "*fma4i_vmfmadd_<mode>"
1849 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1852 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1853 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1854 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1855 (match_operand:VF_128 4 "const0_operand" "")
1858 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1862 (define_insn "*fma4i_vmfmsub_<mode>"
1863 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1866 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1867 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1869 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1870 (match_operand:VF_128 4 "const0_operand" "")
1873 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1874 [(set_attr "type" "ssemuladd")
1875 (set_attr "mode" "<MODE>")])
1877 (define_insn "*fma4i_vmfnmadd_<mode>"
1878 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1882 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1883 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1884 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1885 (match_operand:VF_128 4 "const0_operand" "")
1888 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1889 [(set_attr "type" "ssemuladd")
1890 (set_attr "mode" "<MODE>")])
1892 (define_insn "*fma4i_vmfnmsub_<mode>"
1893 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1897 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1898 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1900 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1901 (match_operand:VF_128 4 "const0_operand" "")
1904 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1905 [(set_attr "type" "ssemuladd")
1906 (set_attr "mode" "<MODE>")])
1908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1910 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1912 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1914 ;; It would be possible to represent these without the UNSPEC as
1917 ;; (fma op1 op2 op3)
1918 ;; (fma op1 op2 (neg op3))
1921 ;; But this doesn't seem useful in practice.
1923 (define_expand "fmaddsub_<mode>"
1924 [(set (match_operand:VF 0 "register_operand")
1926 [(match_operand:VF 1 "nonimmediate_operand")
1927 (match_operand:VF 2 "nonimmediate_operand")
1928 (match_operand:VF 3 "nonimmediate_operand")]
1930 "TARGET_FMA || TARGET_FMA4")
1932 (define_insn "*fma4_fmaddsub_<mode>"
1933 [(set (match_operand:VF 0 "register_operand" "=x,x")
1935 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1936 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1937 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1940 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1941 [(set_attr "type" "ssemuladd")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "*fma4_fmsubadd_<mode>"
1945 [(set (match_operand:VF 0 "register_operand" "=x,x")
1947 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1948 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1950 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1953 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1954 [(set_attr "type" "ssemuladd")
1955 (set_attr "mode" "<MODE>")])
1957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 ;; FMA3 floating point multiply/accumulate instructions.
1961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1963 (define_insn "*fma_fmadd_<mode>"
1964 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1966 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1967 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1968 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1971 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1972 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1973 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1974 [(set_attr "type" "ssemuladd")
1975 (set_attr "mode" "<MODE>")])
1977 (define_insn "*fma_fmsub_<mode>"
1978 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1980 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1981 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1983 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1986 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1987 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1988 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1989 [(set_attr "type" "ssemuladd")
1990 (set_attr "mode" "<MODE>")])
1992 (define_insn "*fma_fnmadd_<mode>"
1993 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1996 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1997 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1998 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2001 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2002 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2003 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2004 [(set_attr "type" "ssemuladd")
2005 (set_attr "mode" "<MODE>")])
2007 (define_insn "*fma_fnmsub_<mode>"
2008 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2011 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2012 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2014 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2017 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2018 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2019 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2020 [(set_attr "type" "ssemuladd")
2021 (set_attr "mode" "<MODE>")])
2023 (define_insn "*fma_fmaddsub_<mode>"
2024 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2026 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2027 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2028 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2032 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2033 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2034 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<MODE>")])
2038 (define_insn "*fma_fmsubadd_<mode>"
2039 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2041 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2042 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2044 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2048 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2049 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2050 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2051 [(set_attr "type" "ssemuladd")
2052 (set_attr "mode" "<MODE>")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel single-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse_cvtpi2ps"
2061 [(set (match_operand:V4SF 0 "register_operand" "=x")
2064 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2065 (match_operand:V4SF 1 "register_operand" "0")
2068 "cvtpi2ps\t{%2, %0|%0, %2}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "mode" "V4SF")])
2072 (define_insn "sse_cvtps2pi"
2073 [(set (match_operand:V2SI 0 "register_operand" "=y")
2075 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2077 (parallel [(const_int 0) (const_int 1)])))]
2079 "cvtps2pi\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx")
2082 (set_attr "mode" "DI")])
2084 (define_insn "sse_cvttps2pi"
2085 [(set (match_operand:V2SI 0 "register_operand" "=y")
2087 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2088 (parallel [(const_int 0) (const_int 1)])))]
2090 "cvttps2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "prefix_rep" "0")
2094 (set_attr "mode" "SF")])
2096 (define_insn "sse_cvtsi2ss"
2097 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2100 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2101 (match_operand:V4SF 1 "register_operand" "0,0,x")
2105 cvtsi2ss\t{%2, %0|%0, %2}
2106 cvtsi2ss\t{%2, %0|%0, %2}
2107 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "isa" "noavx,noavx,avx")
2109 (set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "vector,double,*")
2111 (set_attr "amdfam10_decode" "vector,double,*")
2112 (set_attr "bdver1_decode" "double,direct,*")
2113 (set_attr "prefix" "orig,orig,vex")
2114 (set_attr "mode" "SF")])
2116 (define_insn "sse_cvtsi2ssq"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2120 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2121 (match_operand:V4SF 1 "register_operand" "0,0,x")
2123 "TARGET_SSE && TARGET_64BIT"
2125 cvtsi2ssq\t{%2, %0|%0, %2}
2126 cvtsi2ssq\t{%2, %0|%0, %2}
2127 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2128 [(set_attr "isa" "noavx,noavx,avx")
2129 (set_attr "type" "sseicvt")
2130 (set_attr "athlon_decode" "vector,double,*")
2131 (set_attr "amdfam10_decode" "vector,double,*")
2132 (set_attr "bdver1_decode" "double,direct,*")
2133 (set_attr "length_vex" "*,*,4")
2134 (set_attr "prefix_rex" "1,1,*")
2135 (set_attr "prefix" "orig,orig,vex")
2136 (set_attr "mode" "SF")])
2138 (define_insn "sse_cvtss2si"
2139 [(set (match_operand:SI 0 "register_operand" "=r,r")
2142 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2143 (parallel [(const_int 0)]))]
2144 UNSPEC_FIX_NOTRUNC))]
2146 "%vcvtss2si\t{%1, %0|%0, %1}"
2147 [(set_attr "type" "sseicvt")
2148 (set_attr "athlon_decode" "double,vector")
2149 (set_attr "bdver1_decode" "double,double")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "prefix" "maybe_vex")
2152 (set_attr "mode" "SI")])
2154 (define_insn "sse_cvtss2si_2"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2156 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2157 UNSPEC_FIX_NOTRUNC))]
2159 "%vcvtss2si\t{%1, %0|%0, %1}"
2160 [(set_attr "type" "sseicvt")
2161 (set_attr "athlon_decode" "double,vector")
2162 (set_attr "amdfam10_decode" "double,double")
2163 (set_attr "bdver1_decode" "double,double")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "prefix" "maybe_vex")
2166 (set_attr "mode" "SI")])
2168 (define_insn "sse_cvtss2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2172 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))]
2174 UNSPEC_FIX_NOTRUNC))]
2175 "TARGET_SSE && TARGET_64BIT"
2176 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "sseicvt")
2178 (set_attr "athlon_decode" "double,vector")
2179 (set_attr "bdver1_decode" "double,double")
2180 (set_attr "prefix_rep" "1")
2181 (set_attr "prefix" "maybe_vex")
2182 (set_attr "mode" "DI")])
2184 (define_insn "sse_cvtss2siq_2"
2185 [(set (match_operand:DI 0 "register_operand" "=r,r")
2186 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2187 UNSPEC_FIX_NOTRUNC))]
2188 "TARGET_SSE && TARGET_64BIT"
2189 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "sseicvt")
2191 (set_attr "athlon_decode" "double,vector")
2192 (set_attr "amdfam10_decode" "double,double")
2193 (set_attr "bdver1_decode" "double,double")
2194 (set_attr "prefix_rep" "1")
2195 (set_attr "prefix" "maybe_vex")
2196 (set_attr "mode" "DI")])
2198 (define_insn "sse_cvttss2si"
2199 [(set (match_operand:SI 0 "register_operand" "=r,r")
2202 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2203 (parallel [(const_int 0)]))))]
2205 "%vcvttss2si\t{%1, %0|%0, %1}"
2206 [(set_attr "type" "sseicvt")
2207 (set_attr "athlon_decode" "double,vector")
2208 (set_attr "amdfam10_decode" "double,double")
2209 (set_attr "bdver1_decode" "double,double")
2210 (set_attr "prefix_rep" "1")
2211 (set_attr "prefix" "maybe_vex")
2212 (set_attr "mode" "SI")])
2214 (define_insn "sse_cvttss2siq"
2215 [(set (match_operand:DI 0 "register_operand" "=r,r")
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))))]
2220 "TARGET_SSE && TARGET_64BIT"
2221 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2222 [(set_attr "type" "sseicvt")
2223 (set_attr "athlon_decode" "double,vector")
2224 (set_attr "amdfam10_decode" "double,double")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "DI")])
2230 (define_insn "avx_cvtdq2ps256"
2231 [(set (match_operand:V8SF 0 "register_operand" "=x")
2232 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2234 "vcvtdq2ps\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "ssecvt")
2236 (set_attr "prefix" "vex")
2237 (set_attr "mode" "V8SF")])
2239 (define_insn "sse2_cvtdq2ps"
2240 [(set (match_operand:V4SF 0 "register_operand" "=x")
2241 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2243 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2244 [(set_attr "type" "ssecvt")
2245 (set_attr "prefix" "maybe_vex")
2246 (set_attr "mode" "V4SF")])
2248 (define_expand "sse2_cvtudq2ps"
2250 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2252 (lt:V4SF (match_dup 5) (match_dup 3)))
2254 (and:V4SF (match_dup 6) (match_dup 4)))
2255 (set (match_operand:V4SF 0 "register_operand" "")
2256 (plus:V4SF (match_dup 5) (match_dup 7)))]
2259 REAL_VALUE_TYPE TWO32r;
2263 real_ldexp (&TWO32r, &dconst1, 32);
2264 x = const_double_from_real_value (TWO32r, SFmode);
2266 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2267 operands[4] = force_reg (V4SFmode,
2268 ix86_build_const_vector (V4SFmode, 1, x));
2270 for (i = 5; i < 8; i++)
2271 operands[i] = gen_reg_rtx (V4SFmode);
2274 (define_insn "avx_cvtps2dq256"
2275 [(set (match_operand:V8SI 0 "register_operand" "=x")
2276 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2277 UNSPEC_FIX_NOTRUNC))]
2279 "vcvtps2dq\t{%1, %0|%0, %1}"
2280 [(set_attr "type" "ssecvt")
2281 (set_attr "prefix" "vex")
2282 (set_attr "mode" "OI")])
2284 (define_insn "sse2_cvtps2dq"
2285 [(set (match_operand:V4SI 0 "register_operand" "=x")
2286 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2287 UNSPEC_FIX_NOTRUNC))]
2289 "%vcvtps2dq\t{%1, %0|%0, %1}"
2290 [(set_attr "type" "ssecvt")
2291 (set (attr "prefix_data16")
2293 (match_test "TARGET_AVX")
2295 (const_string "1")))
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "TI")])
2299 (define_insn "avx_cvttps2dq256"
2300 [(set (match_operand:V8SI 0 "register_operand" "=x")
2301 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2303 "vcvttps2dq\t{%1, %0|%0, %1}"
2304 [(set_attr "type" "ssecvt")
2305 (set_attr "prefix" "vex")
2306 (set_attr "mode" "OI")])
2308 (define_insn "sse2_cvttps2dq"
2309 [(set (match_operand:V4SI 0 "register_operand" "=x")
2310 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2312 "%vcvttps2dq\t{%1, %0|%0, %1}"
2313 [(set_attr "type" "ssecvt")
2314 (set (attr "prefix_rep")
2316 (match_test "TARGET_AVX")
2318 (const_string "1")))
2319 (set (attr "prefix_data16")
2321 (match_test "TARGET_AVX")
2323 (const_string "0")))
2324 (set_attr "prefix_data16" "0")
2325 (set_attr "prefix" "maybe_vex")
2326 (set_attr "mode" "TI")])
2328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2330 ;; Parallel double-precision floating point conversion operations
2332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2334 (define_insn "sse2_cvtpi2pd"
2335 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2336 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2338 "cvtpi2pd\t{%1, %0|%0, %1}"
2339 [(set_attr "type" "ssecvt")
2340 (set_attr "unit" "mmx,*")
2341 (set_attr "prefix_data16" "1,*")
2342 (set_attr "mode" "V2DF")])
2344 (define_insn "sse2_cvtpd2pi"
2345 [(set (match_operand:V2SI 0 "register_operand" "=y")
2346 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2347 UNSPEC_FIX_NOTRUNC))]
2349 "cvtpd2pi\t{%1, %0|%0, %1}"
2350 [(set_attr "type" "ssecvt")
2351 (set_attr "unit" "mmx")
2352 (set_attr "bdver1_decode" "double")
2353 (set_attr "prefix_data16" "1")
2354 (set_attr "mode" "DI")])
2356 (define_insn "sse2_cvttpd2pi"
2357 [(set (match_operand:V2SI 0 "register_operand" "=y")
2358 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2360 "cvttpd2pi\t{%1, %0|%0, %1}"
2361 [(set_attr "type" "ssecvt")
2362 (set_attr "unit" "mmx")
2363 (set_attr "bdver1_decode" "double")
2364 (set_attr "prefix_data16" "1")
2365 (set_attr "mode" "TI")])
2367 (define_insn "sse2_cvtsi2sd"
2368 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2371 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2372 (match_operand:V2DF 1 "register_operand" "0,0,x")
2376 cvtsi2sd\t{%2, %0|%0, %2}
2377 cvtsi2sd\t{%2, %0|%0, %2}
2378 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2379 [(set_attr "isa" "noavx,noavx,avx")
2380 (set_attr "type" "sseicvt")
2381 (set_attr "athlon_decode" "double,direct,*")
2382 (set_attr "amdfam10_decode" "vector,double,*")
2383 (set_attr "bdver1_decode" "double,direct,*")
2384 (set_attr "prefix" "orig,orig,vex")
2385 (set_attr "mode" "DF")])
2387 (define_insn "sse2_cvtsi2sdq"
2388 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2391 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2392 (match_operand:V2DF 1 "register_operand" "0,0,x")
2394 "TARGET_SSE2 && TARGET_64BIT"
2396 cvtsi2sdq\t{%2, %0|%0, %2}
2397 cvtsi2sdq\t{%2, %0|%0, %2}
2398 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2399 [(set_attr "isa" "noavx,noavx,avx")
2400 (set_attr "type" "sseicvt")
2401 (set_attr "athlon_decode" "double,direct,*")
2402 (set_attr "amdfam10_decode" "vector,double,*")
2403 (set_attr "bdver1_decode" "double,direct,*")
2404 (set_attr "length_vex" "*,*,4")
2405 (set_attr "prefix_rex" "1,1,*")
2406 (set_attr "prefix" "orig,orig,vex")
2407 (set_attr "mode" "DF")])
2409 (define_insn "sse2_cvtsd2si"
2410 [(set (match_operand:SI 0 "register_operand" "=r,r")
2413 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2414 (parallel [(const_int 0)]))]
2415 UNSPEC_FIX_NOTRUNC))]
2417 "%vcvtsd2si\t{%1, %0|%0, %1}"
2418 [(set_attr "type" "sseicvt")
2419 (set_attr "athlon_decode" "double,vector")
2420 (set_attr "bdver1_decode" "double,double")
2421 (set_attr "prefix_rep" "1")
2422 (set_attr "prefix" "maybe_vex")
2423 (set_attr "mode" "SI")])
2425 (define_insn "sse2_cvtsd2si_2"
2426 [(set (match_operand:SI 0 "register_operand" "=r,r")
2427 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2428 UNSPEC_FIX_NOTRUNC))]
2430 "%vcvtsd2si\t{%1, %0|%0, %1}"
2431 [(set_attr "type" "sseicvt")
2432 (set_attr "athlon_decode" "double,vector")
2433 (set_attr "amdfam10_decode" "double,double")
2434 (set_attr "bdver1_decode" "double,double")
2435 (set_attr "prefix_rep" "1")
2436 (set_attr "prefix" "maybe_vex")
2437 (set_attr "mode" "SI")])
2439 (define_insn "sse2_cvtsd2siq"
2440 [(set (match_operand:DI 0 "register_operand" "=r,r")
2443 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2444 (parallel [(const_int 0)]))]
2445 UNSPEC_FIX_NOTRUNC))]
2446 "TARGET_SSE2 && TARGET_64BIT"
2447 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sseicvt")
2449 (set_attr "athlon_decode" "double,vector")
2450 (set_attr "bdver1_decode" "double,double")
2451 (set_attr "prefix_rep" "1")
2452 (set_attr "prefix" "maybe_vex")
2453 (set_attr "mode" "DI")])
2455 (define_insn "sse2_cvtsd2siq_2"
2456 [(set (match_operand:DI 0 "register_operand" "=r,r")
2457 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2458 UNSPEC_FIX_NOTRUNC))]
2459 "TARGET_SSE2 && TARGET_64BIT"
2460 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2461 [(set_attr "type" "sseicvt")
2462 (set_attr "athlon_decode" "double,vector")
2463 (set_attr "amdfam10_decode" "double,double")
2464 (set_attr "bdver1_decode" "double,double")
2465 (set_attr "prefix_rep" "1")
2466 (set_attr "prefix" "maybe_vex")
2467 (set_attr "mode" "DI")])
2469 (define_insn "sse2_cvttsd2si"
2470 [(set (match_operand:SI 0 "register_operand" "=r,r")
2473 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2474 (parallel [(const_int 0)]))))]
2476 "%vcvttsd2si\t{%1, %0|%0, %1}"
2477 [(set_attr "type" "sseicvt")
2478 (set_attr "athlon_decode" "double,vector")
2479 (set_attr "amdfam10_decode" "double,double")
2480 (set_attr "bdver1_decode" "double,double")
2481 (set_attr "prefix_rep" "1")
2482 (set_attr "prefix" "maybe_vex")
2483 (set_attr "mode" "SI")])
2485 (define_insn "sse2_cvttsd2siq"
2486 [(set (match_operand:DI 0 "register_operand" "=r,r")
2489 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2490 (parallel [(const_int 0)]))))]
2491 "TARGET_SSE2 && TARGET_64BIT"
2492 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2493 [(set_attr "type" "sseicvt")
2494 (set_attr "athlon_decode" "double,vector")
2495 (set_attr "amdfam10_decode" "double,double")
2496 (set_attr "bdver1_decode" "double,double")
2497 (set_attr "prefix_rep" "1")
2498 (set_attr "prefix" "maybe_vex")
2499 (set_attr "mode" "DI")])
2501 (define_insn "avx_cvtdq2pd256"
2502 [(set (match_operand:V4DF 0 "register_operand" "=x")
2503 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2505 "vcvtdq2pd\t{%1, %0|%0, %1}"
2506 [(set_attr "type" "ssecvt")
2507 (set_attr "prefix" "vex")
2508 (set_attr "mode" "V4DF")])
2510 (define_insn "avx_cvtdq2pd256_2"
2511 [(set (match_operand:V4DF 0 "register_operand" "=x")
2514 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2515 (parallel [(const_int 0) (const_int 1)
2516 (const_int 2) (const_int 3)]))))]
2518 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2519 [(set_attr "type" "ssecvt")
2520 (set_attr "prefix" "vex")
2521 (set_attr "mode" "V4DF")])
2523 (define_insn "sse2_cvtdq2pd"
2524 [(set (match_operand:V2DF 0 "register_operand" "=x")
2527 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2528 (parallel [(const_int 0) (const_int 1)]))))]
2530 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2531 [(set_attr "type" "ssecvt")
2532 (set_attr "prefix" "maybe_vex")
2533 (set_attr "mode" "V2DF")])
2535 (define_insn "avx_cvtpd2dq256"
2536 [(set (match_operand:V4SI 0 "register_operand" "=x")
2537 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2538 UNSPEC_FIX_NOTRUNC))]
2540 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "ssecvt")
2542 (set_attr "prefix" "vex")
2543 (set_attr "mode" "OI")])
2545 (define_expand "avx_cvtpd2dq256_2"
2546 [(set (match_operand:V8SI 0 "register_operand" "")
2548 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2552 "operands[2] = CONST0_RTX (V4SImode);")
2554 (define_insn "*avx_cvtpd2dq256_2"
2555 [(set (match_operand:V8SI 0 "register_operand" "=x")
2557 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2559 (match_operand:V4SI 2 "const0_operand" "")))]
2561 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2562 [(set_attr "type" "ssecvt")
2563 (set_attr "prefix" "vex")
2564 (set_attr "mode" "OI")])
2566 (define_expand "sse2_cvtpd2dq"
2567 [(set (match_operand:V4SI 0 "register_operand" "")
2569 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2573 "operands[2] = CONST0_RTX (V2SImode);")
2575 (define_insn "*sse2_cvtpd2dq"
2576 [(set (match_operand:V4SI 0 "register_operand" "=x")
2578 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2580 (match_operand:V2SI 2 "const0_operand" "")))]
2584 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2586 return "cvtpd2dq\t{%1, %0|%0, %1}";
2588 [(set_attr "type" "ssecvt")
2589 (set_attr "prefix_rep" "1")
2590 (set_attr "prefix_data16" "0")
2591 (set_attr "prefix" "maybe_vex")
2592 (set_attr "mode" "TI")
2593 (set_attr "amdfam10_decode" "double")
2594 (set_attr "athlon_decode" "vector")
2595 (set_attr "bdver1_decode" "double")])
2597 (define_insn "avx_cvttpd2dq256"
2598 [(set (match_operand:V4SI 0 "register_operand" "=x")
2599 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2601 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2602 [(set_attr "type" "ssecvt")
2603 (set_attr "prefix" "vex")
2604 (set_attr "mode" "OI")])
2606 (define_expand "avx_cvttpd2dq256_2"
2607 [(set (match_operand:V8SI 0 "register_operand" "")
2609 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2612 "operands[2] = CONST0_RTX (V4SImode);")
2614 (define_insn "*avx_cvttpd2dq256_2"
2615 [(set (match_operand:V8SI 0 "register_operand" "=x")
2617 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2618 (match_operand:V4SI 2 "const0_operand" "")))]
2620 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2621 [(set_attr "type" "ssecvt")
2622 (set_attr "prefix" "vex")
2623 (set_attr "mode" "OI")])
2625 (define_expand "sse2_cvttpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "")
2628 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2631 "operands[2] = CONST0_RTX (V2SImode);")
2633 (define_insn "*sse2_cvttpd2dq"
2634 [(set (match_operand:V4SI 0 "register_operand" "=x")
2636 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2637 (match_operand:V2SI 2 "const0_operand" "")))]
2641 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2643 return "cvttpd2dq\t{%1, %0|%0, %1}";
2645 [(set_attr "type" "ssecvt")
2646 (set_attr "amdfam10_decode" "double")
2647 (set_attr "athlon_decode" "vector")
2648 (set_attr "bdver1_decode" "double")
2649 (set_attr "prefix" "maybe_vex")
2650 (set_attr "mode" "TI")])
2652 (define_insn "sse2_cvtsd2ss"
2653 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2656 (float_truncate:V2SF
2657 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2658 (match_operand:V4SF 1 "register_operand" "0,0,x")
2662 cvtsd2ss\t{%2, %0|%0, %2}
2663 cvtsd2ss\t{%2, %0|%0, %2}
2664 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2665 [(set_attr "isa" "noavx,noavx,avx")
2666 (set_attr "type" "ssecvt")
2667 (set_attr "athlon_decode" "vector,double,*")
2668 (set_attr "amdfam10_decode" "vector,double,*")
2669 (set_attr "bdver1_decode" "direct,direct,*")
2670 (set_attr "prefix" "orig,orig,vex")
2671 (set_attr "mode" "SF")])
2673 (define_insn "sse2_cvtss2sd"
2674 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2678 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2679 (parallel [(const_int 0) (const_int 1)])))
2680 (match_operand:V2DF 1 "register_operand" "0,0,x")
2684 cvtss2sd\t{%2, %0|%0, %2}
2685 cvtss2sd\t{%2, %0|%0, %2}
2686 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2687 [(set_attr "isa" "noavx,noavx,avx")
2688 (set_attr "type" "ssecvt")
2689 (set_attr "amdfam10_decode" "vector,double,*")
2690 (set_attr "athlon_decode" "direct,direct,*")
2691 (set_attr "bdver1_decode" "direct,direct,*")
2692 (set_attr "prefix" "orig,orig,vex")
2693 (set_attr "mode" "DF")])
2695 (define_insn "avx_cvtpd2ps256"
2696 [(set (match_operand:V4SF 0 "register_operand" "=x")
2697 (float_truncate:V4SF
2698 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2700 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssecvt")
2702 (set_attr "prefix" "vex")
2703 (set_attr "mode" "V4SF")])
2705 (define_expand "sse2_cvtpd2ps"
2706 [(set (match_operand:V4SF 0 "register_operand" "")
2708 (float_truncate:V2SF
2709 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2712 "operands[2] = CONST0_RTX (V2SFmode);")
2714 (define_insn "*sse2_cvtpd2ps"
2715 [(set (match_operand:V4SF 0 "register_operand" "=x")
2717 (float_truncate:V2SF
2718 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2719 (match_operand:V2SF 2 "const0_operand" "")))]
2723 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2725 return "cvtpd2ps\t{%1, %0|%0, %1}";
2727 [(set_attr "type" "ssecvt")
2728 (set_attr "amdfam10_decode" "double")
2729 (set_attr "athlon_decode" "vector")
2730 (set_attr "bdver1_decode" "double")
2731 (set_attr "prefix_data16" "1")
2732 (set_attr "prefix" "maybe_vex")
2733 (set_attr "mode" "V4SF")])
2735 (define_insn "avx_cvtps2pd256"
2736 [(set (match_operand:V4DF 0 "register_operand" "=x")
2738 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2740 "vcvtps2pd\t{%1, %0|%0, %1}"
2741 [(set_attr "type" "ssecvt")
2742 (set_attr "prefix" "vex")
2743 (set_attr "mode" "V4DF")])
2745 (define_insn "*avx_cvtps2pd256_2"
2746 [(set (match_operand:V4DF 0 "register_operand" "=x")
2749 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2750 (parallel [(const_int 0) (const_int 1)
2751 (const_int 2) (const_int 3)]))))]
2753 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2754 [(set_attr "type" "ssecvt")
2755 (set_attr "prefix" "vex")
2756 (set_attr "mode" "V4DF")])
2758 (define_insn "sse2_cvtps2pd"
2759 [(set (match_operand:V2DF 0 "register_operand" "=x")
2762 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2763 (parallel [(const_int 0) (const_int 1)]))))]
2765 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2766 [(set_attr "type" "ssecvt")
2767 (set_attr "amdfam10_decode" "direct")
2768 (set_attr "athlon_decode" "double")
2769 (set_attr "bdver1_decode" "double")
2770 (set_attr "prefix_data16" "0")
2771 (set_attr "prefix" "maybe_vex")
2772 (set_attr "mode" "V2DF")])
2774 (define_expand "vec_unpacks_hi_v4sf"
2779 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2780 (parallel [(const_int 6) (const_int 7)
2781 (const_int 2) (const_int 3)])))
2782 (set (match_operand:V2DF 0 "register_operand" "")
2786 (parallel [(const_int 0) (const_int 1)]))))]
2788 "operands[2] = gen_reg_rtx (V4SFmode);")
2790 (define_expand "vec_unpacks_hi_v8sf"
2793 (match_operand:V8SF 1 "nonimmediate_operand" "")
2794 (parallel [(const_int 4) (const_int 5)
2795 (const_int 6) (const_int 7)])))
2796 (set (match_operand:V4DF 0 "register_operand" "")
2800 "operands[2] = gen_reg_rtx (V4SFmode);")
2802 (define_expand "vec_unpacks_lo_v4sf"
2803 [(set (match_operand:V2DF 0 "register_operand" "")
2806 (match_operand:V4SF 1 "nonimmediate_operand" "")
2807 (parallel [(const_int 0) (const_int 1)]))))]
2810 (define_expand "vec_unpacks_lo_v8sf"
2811 [(set (match_operand:V4DF 0 "register_operand" "")
2814 (match_operand:V8SF 1 "nonimmediate_operand" "")
2815 (parallel [(const_int 0) (const_int 1)
2816 (const_int 2) (const_int 3)]))))]
2819 (define_mode_attr sseunpackfltmode
2820 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2822 (define_expand "vec_unpacks_float_hi_<mode>"
2823 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2824 (match_operand:VI2_AVX2 1 "register_operand" "")]
2827 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2829 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2830 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2831 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2835 (define_expand "vec_unpacks_float_lo_<mode>"
2836 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2837 (match_operand:VI2_AVX2 1 "register_operand" "")]
2840 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2842 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2843 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2844 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2848 (define_expand "vec_unpacku_float_hi_<mode>"
2849 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2850 (match_operand:VI2_AVX2 1 "register_operand" "")]
2853 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2855 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2857 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2861 (define_expand "vec_unpacku_float_lo_<mode>"
2862 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2863 (match_operand:VI2_AVX2 1 "register_operand" "")]
2866 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2868 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2869 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2870 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2874 (define_expand "vec_unpacks_float_hi_v4si"
2877 (match_operand:V4SI 1 "nonimmediate_operand" "")
2878 (parallel [(const_int 2) (const_int 3)
2879 (const_int 2) (const_int 3)])))
2880 (set (match_operand:V2DF 0 "register_operand" "")
2884 (parallel [(const_int 0) (const_int 1)]))))]
2886 "operands[2] = gen_reg_rtx (V4SImode);")
2888 (define_expand "vec_unpacks_float_lo_v4si"
2889 [(set (match_operand:V2DF 0 "register_operand" "")
2892 (match_operand:V4SI 1 "nonimmediate_operand" "")
2893 (parallel [(const_int 0) (const_int 1)]))))]
2896 (define_expand "vec_unpacks_float_hi_v8si"
2899 (match_operand:V8SI 1 "nonimmediate_operand" "")
2900 (parallel [(const_int 4) (const_int 5)
2901 (const_int 6) (const_int 7)])))
2902 (set (match_operand:V4DF 0 "register_operand" "")
2906 "operands[2] = gen_reg_rtx (V4SImode);")
2908 (define_expand "vec_unpacks_float_lo_v8si"
2909 [(set (match_operand:V4DF 0 "register_operand" "")
2912 (match_operand:V8SI 1 "nonimmediate_operand" "")
2913 (parallel [(const_int 0) (const_int 1)
2914 (const_int 2) (const_int 3)]))))]
2917 (define_expand "vec_unpacku_float_hi_v4si"
2920 (match_operand:V4SI 1 "nonimmediate_operand" "")
2921 (parallel [(const_int 2) (const_int 3)
2922 (const_int 2) (const_int 3)])))
2927 (parallel [(const_int 0) (const_int 1)]))))
2929 (lt:V2DF (match_dup 6) (match_dup 3)))
2931 (and:V2DF (match_dup 7) (match_dup 4)))
2932 (set (match_operand:V2DF 0 "register_operand" "")
2933 (plus:V2DF (match_dup 6) (match_dup 8)))]
2936 REAL_VALUE_TYPE TWO32r;
2940 real_ldexp (&TWO32r, &dconst1, 32);
2941 x = const_double_from_real_value (TWO32r, DFmode);
2943 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2944 operands[4] = force_reg (V2DFmode,
2945 ix86_build_const_vector (V2DFmode, 1, x));
2947 operands[5] = gen_reg_rtx (V4SImode);
2949 for (i = 6; i < 9; i++)
2950 operands[i] = gen_reg_rtx (V2DFmode);
2953 (define_expand "vec_unpacku_float_lo_v4si"
2957 (match_operand:V4SI 1 "nonimmediate_operand" "")
2958 (parallel [(const_int 0) (const_int 1)]))))
2960 (lt:V2DF (match_dup 5) (match_dup 3)))
2962 (and:V2DF (match_dup 6) (match_dup 4)))
2963 (set (match_operand:V2DF 0 "register_operand" "")
2964 (plus:V2DF (match_dup 5) (match_dup 7)))]
2967 REAL_VALUE_TYPE TWO32r;
2971 real_ldexp (&TWO32r, &dconst1, 32);
2972 x = const_double_from_real_value (TWO32r, DFmode);
2974 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2975 operands[4] = force_reg (V2DFmode,
2976 ix86_build_const_vector (V2DFmode, 1, x));
2978 for (i = 5; i < 8; i++)
2979 operands[i] = gen_reg_rtx (V2DFmode);
2982 (define_expand "vec_unpacku_float_hi_v8si"
2983 [(match_operand:V4DF 0 "register_operand" "")
2984 (match_operand:V8SI 1 "register_operand" "")]
2987 REAL_VALUE_TYPE TWO32r;
2991 real_ldexp (&TWO32r, &dconst1, 32);
2992 x = const_double_from_real_value (TWO32r, DFmode);
2994 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2995 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2996 tmp[5] = gen_reg_rtx (V4SImode);
2998 for (i = 2; i < 5; i++)
2999 tmp[i] = gen_reg_rtx (V4DFmode);
3000 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3001 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
3002 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3003 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3004 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3005 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3009 (define_expand "vec_unpacku_float_lo_v8si"
3010 [(match_operand:V4DF 0 "register_operand" "")
3011 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3014 REAL_VALUE_TYPE TWO32r;
3018 real_ldexp (&TWO32r, &dconst1, 32);
3019 x = const_double_from_real_value (TWO32r, DFmode);
3021 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3022 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3024 for (i = 2; i < 5; i++)
3025 tmp[i] = gen_reg_rtx (V4DFmode);
3026 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3027 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3028 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3029 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3030 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3034 (define_expand "vec_pack_trunc_v4df"
3036 (float_truncate:V4SF
3037 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3039 (float_truncate:V4SF
3040 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3041 (set (match_operand:V8SF 0 "register_operand" "")
3047 operands[3] = gen_reg_rtx (V4SFmode);
3048 operands[4] = gen_reg_rtx (V4SFmode);
3051 (define_expand "vec_pack_trunc_v2df"
3052 [(match_operand:V4SF 0 "register_operand" "")
3053 (match_operand:V2DF 1 "nonimmediate_operand" "")
3054 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3059 r1 = gen_reg_rtx (V4SFmode);
3060 r2 = gen_reg_rtx (V4SFmode);
3062 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3063 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3064 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3068 (define_expand "vec_pack_sfix_trunc_v4df"
3069 [(match_operand:V8SI 0 "register_operand" "")
3070 (match_operand:V4DF 1 "nonimmediate_operand" "")
3071 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3076 r1 = gen_reg_rtx (V8SImode);
3077 r2 = gen_reg_rtx (V8SImode);
3079 emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
3080 emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
3081 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3085 (define_expand "vec_pack_sfix_trunc_v2df"
3086 [(match_operand:V4SI 0 "register_operand" "")
3087 (match_operand:V2DF 1 "nonimmediate_operand" "")
3088 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3093 r1 = gen_reg_rtx (V4SImode);
3094 r2 = gen_reg_rtx (V4SImode);
3096 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3097 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3098 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3099 gen_lowpart (V2DImode, r1),
3100 gen_lowpart (V2DImode, r2)));
3104 (define_expand "vec_pack_sfix_v4df"
3105 [(match_operand:V8SI 0 "register_operand" "")
3106 (match_operand:V4DF 1 "nonimmediate_operand" "")
3107 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3112 r1 = gen_reg_rtx (V8SImode);
3113 r2 = gen_reg_rtx (V8SImode);
3115 emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
3116 emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
3117 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3121 (define_expand "vec_pack_sfix_v2df"
3122 [(match_operand:V4SI 0 "register_operand" "")
3123 (match_operand:V2DF 1 "nonimmediate_operand" "")
3124 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3129 r1 = gen_reg_rtx (V4SImode);
3130 r2 = gen_reg_rtx (V4SImode);
3132 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3133 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3134 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3135 gen_lowpart (V2DImode, r1),
3136 gen_lowpart (V2DImode, r2)));
3140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3142 ;; Parallel single-precision floating point element swizzling
3144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3146 (define_expand "sse_movhlps_exp"
3147 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3150 (match_operand:V4SF 1 "nonimmediate_operand" "")
3151 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3152 (parallel [(const_int 6)
3158 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3160 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3162 /* Fix up the destination if needed. */
3163 if (dst != operands[0])
3164 emit_move_insn (operands[0], dst);
3169 (define_insn "sse_movhlps"
3170 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3173 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3174 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3175 (parallel [(const_int 6)
3179 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3181 movhlps\t{%2, %0|%0, %2}
3182 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3183 movlps\t{%H2, %0|%0, %H2}
3184 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3185 %vmovhps\t{%2, %0|%0, %2}"
3186 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3187 (set_attr "type" "ssemov")
3188 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3189 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3191 (define_expand "sse_movlhps_exp"
3192 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3195 (match_operand:V4SF 1 "nonimmediate_operand" "")
3196 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3197 (parallel [(const_int 0)
3203 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3205 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3207 /* Fix up the destination if needed. */
3208 if (dst != operands[0])
3209 emit_move_insn (operands[0], dst);
3214 (define_insn "sse_movlhps"
3215 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3218 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3219 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3220 (parallel [(const_int 0)
3224 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3226 movlhps\t{%2, %0|%0, %2}
3227 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3228 movhps\t{%2, %0|%0, %2}
3229 vmovhps\t{%2, %1, %0|%0, %1, %2}
3230 %vmovlps\t{%2, %H0|%H0, %2}"
3231 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3232 (set_attr "type" "ssemov")
3233 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3234 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3236 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3237 (define_insn "avx_unpckhps256"
3238 [(set (match_operand:V8SF 0 "register_operand" "=x")
3241 (match_operand:V8SF 1 "register_operand" "x")
3242 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3243 (parallel [(const_int 2) (const_int 10)
3244 (const_int 3) (const_int 11)
3245 (const_int 6) (const_int 14)
3246 (const_int 7) (const_int 15)])))]
3248 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3249 [(set_attr "type" "sselog")
3250 (set_attr "prefix" "vex")
3251 (set_attr "mode" "V8SF")])
3253 (define_expand "vec_interleave_highv8sf"
3257 (match_operand:V8SF 1 "register_operand" "x")
3258 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3259 (parallel [(const_int 0) (const_int 8)
3260 (const_int 1) (const_int 9)
3261 (const_int 4) (const_int 12)
3262 (const_int 5) (const_int 13)])))
3268 (parallel [(const_int 2) (const_int 10)
3269 (const_int 3) (const_int 11)
3270 (const_int 6) (const_int 14)
3271 (const_int 7) (const_int 15)])))
3272 (set (match_operand:V8SF 0 "register_operand" "")
3277 (parallel [(const_int 4) (const_int 5)
3278 (const_int 6) (const_int 7)
3279 (const_int 12) (const_int 13)
3280 (const_int 14) (const_int 15)])))]
3283 operands[3] = gen_reg_rtx (V8SFmode);
3284 operands[4] = gen_reg_rtx (V8SFmode);
3287 (define_insn "vec_interleave_highv4sf"
3288 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3291 (match_operand:V4SF 1 "register_operand" "0,x")
3292 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3293 (parallel [(const_int 2) (const_int 6)
3294 (const_int 3) (const_int 7)])))]
3297 unpckhps\t{%2, %0|%0, %2}
3298 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3299 [(set_attr "isa" "noavx,avx")
3300 (set_attr "type" "sselog")
3301 (set_attr "prefix" "orig,vex")
3302 (set_attr "mode" "V4SF")])
3304 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3305 (define_insn "avx_unpcklps256"
3306 [(set (match_operand:V8SF 0 "register_operand" "=x")
3309 (match_operand:V8SF 1 "register_operand" "x")
3310 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3311 (parallel [(const_int 0) (const_int 8)
3312 (const_int 1) (const_int 9)
3313 (const_int 4) (const_int 12)
3314 (const_int 5) (const_int 13)])))]
3316 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3317 [(set_attr "type" "sselog")
3318 (set_attr "prefix" "vex")
3319 (set_attr "mode" "V8SF")])
3321 (define_expand "vec_interleave_lowv8sf"
3325 (match_operand:V8SF 1 "register_operand" "x")
3326 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3327 (parallel [(const_int 0) (const_int 8)
3328 (const_int 1) (const_int 9)
3329 (const_int 4) (const_int 12)
3330 (const_int 5) (const_int 13)])))
3336 (parallel [(const_int 2) (const_int 10)
3337 (const_int 3) (const_int 11)
3338 (const_int 6) (const_int 14)
3339 (const_int 7) (const_int 15)])))
3340 (set (match_operand:V8SF 0 "register_operand" "")
3345 (parallel [(const_int 0) (const_int 1)
3346 (const_int 2) (const_int 3)
3347 (const_int 8) (const_int 9)
3348 (const_int 10) (const_int 11)])))]
3351 operands[3] = gen_reg_rtx (V8SFmode);
3352 operands[4] = gen_reg_rtx (V8SFmode);
3355 (define_insn "vec_interleave_lowv4sf"
3356 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3359 (match_operand:V4SF 1 "register_operand" "0,x")
3360 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3361 (parallel [(const_int 0) (const_int 4)
3362 (const_int 1) (const_int 5)])))]
3365 unpcklps\t{%2, %0|%0, %2}
3366 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3367 [(set_attr "isa" "noavx,avx")
3368 (set_attr "type" "sselog")
3369 (set_attr "prefix" "orig,vex")
3370 (set_attr "mode" "V4SF")])
3372 ;; These are modeled with the same vec_concat as the others so that we
3373 ;; capture users of shufps that can use the new instructions
3374 (define_insn "avx_movshdup256"
3375 [(set (match_operand:V8SF 0 "register_operand" "=x")
3378 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3380 (parallel [(const_int 1) (const_int 1)
3381 (const_int 3) (const_int 3)
3382 (const_int 5) (const_int 5)
3383 (const_int 7) (const_int 7)])))]
3385 "vmovshdup\t{%1, %0|%0, %1}"
3386 [(set_attr "type" "sse")
3387 (set_attr "prefix" "vex")
3388 (set_attr "mode" "V8SF")])
3390 (define_insn "sse3_movshdup"
3391 [(set (match_operand:V4SF 0 "register_operand" "=x")
3394 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3396 (parallel [(const_int 1)
3401 "%vmovshdup\t{%1, %0|%0, %1}"
3402 [(set_attr "type" "sse")
3403 (set_attr "prefix_rep" "1")
3404 (set_attr "prefix" "maybe_vex")
3405 (set_attr "mode" "V4SF")])
3407 (define_insn "avx_movsldup256"
3408 [(set (match_operand:V8SF 0 "register_operand" "=x")
3411 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3413 (parallel [(const_int 0) (const_int 0)
3414 (const_int 2) (const_int 2)
3415 (const_int 4) (const_int 4)
3416 (const_int 6) (const_int 6)])))]
3418 "vmovsldup\t{%1, %0|%0, %1}"
3419 [(set_attr "type" "sse")
3420 (set_attr "prefix" "vex")
3421 (set_attr "mode" "V8SF")])
3423 (define_insn "sse3_movsldup"
3424 [(set (match_operand:V4SF 0 "register_operand" "=x")
3427 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3429 (parallel [(const_int 0)
3434 "%vmovsldup\t{%1, %0|%0, %1}"
3435 [(set_attr "type" "sse")
3436 (set_attr "prefix_rep" "1")
3437 (set_attr "prefix" "maybe_vex")
3438 (set_attr "mode" "V4SF")])
3440 (define_expand "avx_shufps256"
3441 [(match_operand:V8SF 0 "register_operand" "")
3442 (match_operand:V8SF 1 "register_operand" "")
3443 (match_operand:V8SF 2 "nonimmediate_operand" "")
3444 (match_operand:SI 3 "const_int_operand" "")]
3447 int mask = INTVAL (operands[3]);
3448 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3449 GEN_INT ((mask >> 0) & 3),
3450 GEN_INT ((mask >> 2) & 3),
3451 GEN_INT (((mask >> 4) & 3) + 8),
3452 GEN_INT (((mask >> 6) & 3) + 8),
3453 GEN_INT (((mask >> 0) & 3) + 4),
3454 GEN_INT (((mask >> 2) & 3) + 4),
3455 GEN_INT (((mask >> 4) & 3) + 12),
3456 GEN_INT (((mask >> 6) & 3) + 12)));
3460 ;; One bit in mask selects 2 elements.
3461 (define_insn "avx_shufps256_1"
3462 [(set (match_operand:V8SF 0 "register_operand" "=x")
3465 (match_operand:V8SF 1 "register_operand" "x")
3466 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3467 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3468 (match_operand 4 "const_0_to_3_operand" "")
3469 (match_operand 5 "const_8_to_11_operand" "")
3470 (match_operand 6 "const_8_to_11_operand" "")
3471 (match_operand 7 "const_4_to_7_operand" "")
3472 (match_operand 8 "const_4_to_7_operand" "")
3473 (match_operand 9 "const_12_to_15_operand" "")
3474 (match_operand 10 "const_12_to_15_operand" "")])))]
3476 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3477 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3478 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3479 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3482 mask = INTVAL (operands[3]);
3483 mask |= INTVAL (operands[4]) << 2;
3484 mask |= (INTVAL (operands[5]) - 8) << 4;
3485 mask |= (INTVAL (operands[6]) - 8) << 6;
3486 operands[3] = GEN_INT (mask);
3488 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3490 [(set_attr "type" "sselog")
3491 (set_attr "length_immediate" "1")
3492 (set_attr "prefix" "vex")
3493 (set_attr "mode" "V8SF")])
3495 (define_expand "sse_shufps"
3496 [(match_operand:V4SF 0 "register_operand" "")
3497 (match_operand:V4SF 1 "register_operand" "")
3498 (match_operand:V4SF 2 "nonimmediate_operand" "")
3499 (match_operand:SI 3 "const_int_operand" "")]
3502 int mask = INTVAL (operands[3]);
3503 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3504 GEN_INT ((mask >> 0) & 3),
3505 GEN_INT ((mask >> 2) & 3),
3506 GEN_INT (((mask >> 4) & 3) + 4),
3507 GEN_INT (((mask >> 6) & 3) + 4)));
3511 (define_insn "sse_shufps_<mode>"
3512 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3513 (vec_select:VI4F_128
3514 (vec_concat:<ssedoublevecmode>
3515 (match_operand:VI4F_128 1 "register_operand" "0,x")
3516 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3517 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3518 (match_operand 4 "const_0_to_3_operand" "")
3519 (match_operand 5 "const_4_to_7_operand" "")
3520 (match_operand 6 "const_4_to_7_operand" "")])))]
3524 mask |= INTVAL (operands[3]) << 0;
3525 mask |= INTVAL (operands[4]) << 2;
3526 mask |= (INTVAL (operands[5]) - 4) << 4;
3527 mask |= (INTVAL (operands[6]) - 4) << 6;
3528 operands[3] = GEN_INT (mask);
3530 switch (which_alternative)
3533 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3535 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3540 [(set_attr "isa" "noavx,avx")
3541 (set_attr "type" "sselog")
3542 (set_attr "length_immediate" "1")
3543 (set_attr "prefix" "orig,vex")
3544 (set_attr "mode" "V4SF")])
3546 (define_insn "sse_storehps"
3547 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3549 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3550 (parallel [(const_int 2) (const_int 3)])))]
3553 %vmovhps\t{%1, %0|%0, %1}
3554 %vmovhlps\t{%1, %d0|%d0, %1}
3555 %vmovlps\t{%H1, %d0|%d0, %H1}"
3556 [(set_attr "type" "ssemov")
3557 (set_attr "prefix" "maybe_vex")
3558 (set_attr "mode" "V2SF,V4SF,V2SF")])
3560 (define_expand "sse_loadhps_exp"
3561 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3564 (match_operand:V4SF 1 "nonimmediate_operand" "")
3565 (parallel [(const_int 0) (const_int 1)]))
3566 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3569 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3571 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3573 /* Fix up the destination if needed. */
3574 if (dst != operands[0])
3575 emit_move_insn (operands[0], dst);
3580 (define_insn "sse_loadhps"
3581 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3584 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3585 (parallel [(const_int 0) (const_int 1)]))
3586 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3589 movhps\t{%2, %0|%0, %2}
3590 vmovhps\t{%2, %1, %0|%0, %1, %2}
3591 movlhps\t{%2, %0|%0, %2}
3592 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3593 %vmovlps\t{%2, %H0|%H0, %2}"
3594 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3595 (set_attr "type" "ssemov")
3596 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3597 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3599 (define_insn "sse_storelps"
3600 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3602 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3603 (parallel [(const_int 0) (const_int 1)])))]
3606 %vmovlps\t{%1, %0|%0, %1}
3607 %vmovaps\t{%1, %0|%0, %1}
3608 %vmovlps\t{%1, %d0|%d0, %1}"
3609 [(set_attr "type" "ssemov")
3610 (set_attr "prefix" "maybe_vex")
3611 (set_attr "mode" "V2SF,V4SF,V2SF")])
3613 (define_expand "sse_loadlps_exp"
3614 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3616 (match_operand:V2SF 2 "nonimmediate_operand" "")
3618 (match_operand:V4SF 1 "nonimmediate_operand" "")
3619 (parallel [(const_int 2) (const_int 3)]))))]
3622 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3624 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3626 /* Fix up the destination if needed. */
3627 if (dst != operands[0])
3628 emit_move_insn (operands[0], dst);
3633 (define_insn "sse_loadlps"
3634 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3636 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3638 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3639 (parallel [(const_int 2) (const_int 3)]))))]
3642 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3643 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3644 movlps\t{%2, %0|%0, %2}
3645 vmovlps\t{%2, %1, %0|%0, %1, %2}
3646 %vmovlps\t{%2, %0|%0, %2}"
3647 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3648 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3649 (set_attr "length_immediate" "1,1,*,*,*")
3650 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3651 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3653 (define_insn "sse_movss"
3654 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3656 (match_operand:V4SF 2 "register_operand" " x,x")
3657 (match_operand:V4SF 1 "register_operand" " 0,x")
3661 movss\t{%2, %0|%0, %2}
3662 vmovss\t{%2, %1, %0|%0, %1, %2}"
3663 [(set_attr "isa" "noavx,avx")
3664 (set_attr "type" "ssemov")
3665 (set_attr "prefix" "orig,vex")
3666 (set_attr "mode" "SF")])
3668 (define_expand "vec_dupv4sf"
3669 [(set (match_operand:V4SF 0 "register_operand" "")
3671 (match_operand:SF 1 "nonimmediate_operand" "")))]
3675 operands[1] = force_reg (SFmode, operands[1]);
3678 (define_insn "avx2_vec_dupv4sf"
3679 [(set (match_operand:V4SF 0 "register_operand" "=x")
3682 (match_operand:V4SF 1 "register_operand" "x")
3683 (parallel [(const_int 0)]))))]
3685 "vbroadcastss\t{%1, %0|%0, %1}"
3686 [(set_attr "type" "sselog1")
3687 (set_attr "prefix" "vex")
3688 (set_attr "mode" "V4SF")])
3690 (define_insn "*vec_dupv4sf_avx"
3691 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3693 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3696 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3697 vbroadcastss\t{%1, %0|%0, %1}"
3698 [(set_attr "type" "sselog1,ssemov")
3699 (set_attr "length_immediate" "1,0")
3700 (set_attr "prefix_extra" "0,1")
3701 (set_attr "prefix" "vex")
3702 (set_attr "mode" "V4SF")])
3704 (define_insn "avx2_vec_dupv8sf"
3705 [(set (match_operand:V8SF 0 "register_operand" "=x")
3708 (match_operand:V4SF 1 "register_operand" "x")
3709 (parallel [(const_int 0)]))))]
3711 "vbroadcastss\t{%1, %0|%0, %1}"
3712 [(set_attr "type" "sselog1")
3713 (set_attr "prefix" "vex")
3714 (set_attr "mode" "V8SF")])
3716 (define_insn "*vec_dupv4sf"
3717 [(set (match_operand:V4SF 0 "register_operand" "=x")
3719 (match_operand:SF 1 "register_operand" "0")))]
3721 "shufps\t{$0, %0, %0|%0, %0, 0}"
3722 [(set_attr "type" "sselog1")
3723 (set_attr "length_immediate" "1")
3724 (set_attr "mode" "V4SF")])
3726 ;; Although insertps takes register source, we prefer
3727 ;; unpcklps with register source since it is shorter.
3728 (define_insn "*vec_concatv2sf_sse4_1"
3729 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3731 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3732 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3735 unpcklps\t{%2, %0|%0, %2}
3736 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3737 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3738 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3739 %vmovss\t{%1, %0|%0, %1}
3740 punpckldq\t{%2, %0|%0, %2}
3741 movd\t{%1, %0|%0, %1}"
3742 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3743 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3744 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3745 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3746 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3747 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3748 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3750 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3751 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3752 ;; alternatives pretty much forces the MMX alternative to be chosen.
3753 (define_insn "*vec_concatv2sf_sse"
3754 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3756 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3757 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3760 unpcklps\t{%2, %0|%0, %2}
3761 movss\t{%1, %0|%0, %1}
3762 punpckldq\t{%2, %0|%0, %2}
3763 movd\t{%1, %0|%0, %1}"
3764 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3765 (set_attr "mode" "V4SF,SF,DI,DI")])
3767 (define_insn "*vec_concatv4sf"
3768 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3770 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3771 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3774 movlhps\t{%2, %0|%0, %2}
3775 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3776 movhps\t{%2, %0|%0, %2}
3777 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3778 [(set_attr "isa" "noavx,avx,noavx,avx")
3779 (set_attr "type" "ssemov")
3780 (set_attr "prefix" "orig,vex,orig,vex")
3781 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3783 (define_expand "vec_init<mode>"
3784 [(match_operand:V_128 0 "register_operand" "")
3785 (match_operand 1 "" "")]
3788 ix86_expand_vector_init (false, operands[0], operands[1]);
3792 ;; Avoid combining registers from different units in a single alternative,
3793 ;; see comment above inline_secondary_memory_needed function in i386.c
3794 (define_insn "vec_set<mode>_0"
3795 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3796 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3798 (vec_duplicate:VI4F_128
3799 (match_operand:<ssescalarmode> 2 "general_operand"
3800 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3801 (match_operand:VI4F_128 1 "vector_move_operand"
3802 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3806 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3807 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3808 %vmovd\t{%2, %0|%0, %2}
3809 movss\t{%2, %0|%0, %2}
3810 movss\t{%2, %0|%0, %2}
3811 vmovss\t{%2, %1, %0|%0, %1, %2}
3812 pinsrd\t{$0, %2, %0|%0, %2, 0}
3813 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3817 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3819 (cond [(eq_attr "alternative" "0,6,7")
3820 (const_string "sselog")
3821 (eq_attr "alternative" "9")
3822 (const_string "fmov")
3823 (eq_attr "alternative" "10")
3824 (const_string "imov")
3826 (const_string "ssemov")))
3827 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3828 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3829 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3830 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3832 ;; A subset is vec_setv4sf.
3833 (define_insn "*vec_setv4sf_sse4_1"
3834 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3837 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3838 (match_operand:V4SF 1 "register_operand" "0,x")
3839 (match_operand:SI 3 "const_int_operand" "")))]
3841 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3842 < GET_MODE_NUNITS (V4SFmode))"
3844 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3845 switch (which_alternative)
3848 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3850 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3855 [(set_attr "isa" "noavx,avx")
3856 (set_attr "type" "sselog")
3857 (set_attr "prefix_data16" "1,*")
3858 (set_attr "prefix_extra" "1")
3859 (set_attr "length_immediate" "1")
3860 (set_attr "prefix" "orig,vex")
3861 (set_attr "mode" "V4SF")])
3863 (define_insn "sse4_1_insertps"
3864 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3865 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3866 (match_operand:V4SF 1 "register_operand" "0,x")
3867 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3871 if (MEM_P (operands[2]))
3873 unsigned count_s = INTVAL (operands[3]) >> 6;
3875 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3876 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3878 switch (which_alternative)
3881 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3883 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3888 [(set_attr "isa" "noavx,avx")
3889 (set_attr "type" "sselog")
3890 (set_attr "prefix_data16" "1,*")
3891 (set_attr "prefix_extra" "1")
3892 (set_attr "length_immediate" "1")
3893 (set_attr "prefix" "orig,vex")
3894 (set_attr "mode" "V4SF")])
3897 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3899 (vec_duplicate:VI4F_128
3900 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3903 "TARGET_SSE && reload_completed"
3906 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3911 (define_expand "vec_set<mode>"
3912 [(match_operand:V 0 "register_operand" "")
3913 (match_operand:<ssescalarmode> 1 "register_operand" "")
3914 (match_operand 2 "const_int_operand" "")]
3917 ix86_expand_vector_set (false, operands[0], operands[1],
3918 INTVAL (operands[2]));
3922 (define_insn_and_split "*vec_extractv4sf_0"
3923 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3925 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3926 (parallel [(const_int 0)])))]
3927 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3929 "&& reload_completed"
3932 rtx op1 = operands[1];
3934 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3936 op1 = gen_lowpart (SFmode, op1);
3937 emit_move_insn (operands[0], op1);
3941 (define_insn_and_split "*sse4_1_extractps"
3942 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3944 (match_operand:V4SF 1 "register_operand" "x,0,x")
3945 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3948 %vextractps\t{%2, %1, %0|%0, %1, %2}
3951 "&& reload_completed && SSE_REG_P (operands[0])"
3954 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3955 switch (INTVAL (operands[2]))
3959 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3960 operands[2], operands[2],
3961 GEN_INT (INTVAL (operands[2]) + 4),
3962 GEN_INT (INTVAL (operands[2]) + 4)));
3965 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3968 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3973 [(set_attr "isa" "*,noavx,avx")
3974 (set_attr "type" "sselog,*,*")
3975 (set_attr "prefix_data16" "1,*,*")
3976 (set_attr "prefix_extra" "1,*,*")
3977 (set_attr "length_immediate" "1,*,*")
3978 (set_attr "prefix" "maybe_vex,*,*")
3979 (set_attr "mode" "V4SF,*,*")])
3981 (define_insn_and_split "*vec_extract_v4sf_mem"
3982 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3984 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3985 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3988 "&& reload_completed"
3991 int i = INTVAL (operands[2]);
3993 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3997 (define_expand "avx_vextractf128<mode>"
3998 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3999 (match_operand:V_256 1 "register_operand" "")
4000 (match_operand:SI 2 "const_0_to_1_operand" "")]
4003 rtx (*insn)(rtx, rtx);
4005 switch (INTVAL (operands[2]))
4008 insn = gen_vec_extract_lo_<mode>;
4011 insn = gen_vec_extract_hi_<mode>;
4017 emit_insn (insn (operands[0], operands[1]));
4021 (define_insn_and_split "vec_extract_lo_<mode>"
4022 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4023 (vec_select:<ssehalfvecmode>
4024 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4025 (parallel [(const_int 0) (const_int 1)])))]
4026 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4028 "&& reload_completed"
4031 rtx op1 = operands[1];
4033 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4035 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4036 emit_move_insn (operands[0], op1);
4040 (define_insn "vec_extract_hi_<mode>"
4041 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4042 (vec_select:<ssehalfvecmode>
4043 (match_operand:VI8F_256 1 "register_operand" "x,x")
4044 (parallel [(const_int 2) (const_int 3)])))]
4046 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"