1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [(V8SI "TARGET_AVX2") V4SI
130 (V4DI "TARGET_AVX2") V2DI])
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
150 (TI "ssse3") (V2TI "avx2")])
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 (define_mode_attr vec_avx2
165 [(V16QI "vec") (V32QI "avx2")
166 (V8HI "vec") (V16HI "avx2")
167 (V4SI "vec") (V8SI "avx2")
168 (V2DI "vec") (V4DI "avx2")])
170 (define_mode_attr ssedoublemode
171 [(V16HI "V16SI") (V8HI "V8SI")])
173 (define_mode_attr ssebytemode
174 [(V4DI "V32QI") (V2DI "V16QI")])
176 ;; All 128bit vector integer modes
177 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
179 ;; All 256bit vector integer modes
180 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
182 ;; Random 128bit vector integer mode combinations
183 (define_mode_iterator VI12_128 [V16QI V8HI])
184 (define_mode_iterator VI14_128 [V16QI V4SI])
185 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
186 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
187 (define_mode_iterator VI24_128 [V8HI V4SI])
188 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
189 (define_mode_iterator VI48_128 [V4SI V2DI])
191 ;; Random 256bit vector integer mode combinations
192 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
193 (define_mode_iterator VI48_256 [V8SI V4DI])
195 ;; Int-float size matches
196 (define_mode_iterator VI4F_128 [V4SI V4SF])
197 (define_mode_iterator VI8F_128 [V2DI V2DF])
198 (define_mode_iterator VI4F_256 [V8SI V8SF])
199 (define_mode_iterator VI8F_256 [V4DI V4DF])
201 ;; Mapping from float mode to required SSE level
202 (define_mode_attr sse
203 [(SF "sse") (DF "sse2")
204 (V4SF "sse") (V2DF "sse2")
205 (V8SF "avx") (V4DF "avx")])
207 (define_mode_attr sse2
208 [(V16QI "sse2") (V32QI "avx")
209 (V2DI "sse2") (V4DI "avx")])
211 (define_mode_attr sse3
212 [(V16QI "sse3") (V32QI "avx")])
214 (define_mode_attr sse4_1
215 [(V4SF "sse4_1") (V2DF "sse4_1")
216 (V8SF "avx") (V4DF "avx")])
218 (define_mode_attr avxsizesuffix
219 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
220 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
221 (V8SF "256") (V4DF "256")
222 (V4SF "") (V2DF "")])
224 ;; SSE instruction mode
225 (define_mode_attr sseinsnmode
226 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
227 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
228 (V8SF "V8SF") (V4DF "V4DF")
229 (V4SF "V4SF") (V2DF "V2DF")
232 ;; Mapping of vector float modes to an integer mode of the same size
233 (define_mode_attr sseintvecmode
234 [(V8SF "V8SI") (V4DF "V4DI")
235 (V4SF "V4SI") (V2DF "V2DI")
236 (V4DF "V4DI") (V8SF "V8SI")
237 (V8SI "V8SI") (V4DI "V4DI")
238 (V4SI "V4SI") (V2DI "V2DI")
239 (V16HI "V16HI") (V8HI "V8HI")
240 (V32QI "V32QI") (V16QI "V16QI")])
242 ;; Mapping of vector modes to a vector mode of double size
243 (define_mode_attr ssedoublevecmode
244 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
245 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
246 (V8SF "V16SF") (V4DF "V8DF")
247 (V4SF "V8SF") (V2DF "V4DF")])
249 ;; Mapping of vector modes to a vector mode of half size
250 (define_mode_attr ssehalfvecmode
251 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
252 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
253 (V8SF "V4SF") (V4DF "V2DF")
256 ;; Mapping of vector modes back to the scalar modes
257 (define_mode_attr ssescalarmode
258 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
259 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
260 (V8SF "SF") (V4DF "DF")
261 (V4SF "SF") (V2DF "DF")])
263 ;; Number of scalar elements in each vector type
264 (define_mode_attr ssescalarnum
265 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
266 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
267 (V8SF "8") (V4DF "4")
268 (V4SF "4") (V2DF "2")])
270 ;; SSE prefix for integer vector modes
271 (define_mode_attr sseintprefix
272 [(V2DI "p") (V2DF "")
275 (V8SI "p") (V8SF "")])
277 ;; SSE scalar suffix for vector modes
278 (define_mode_attr ssescalarmodesuffix
280 (V8SF "ss") (V4DF "sd")
281 (V4SF "ss") (V2DF "sd")
282 (V8SI "ss") (V4DI "sd")
285 ;; Pack/unpack vector modes
286 (define_mode_attr sseunpackmode
287 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
288 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
290 (define_mode_attr ssepackmode
291 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
292 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
294 ;; Mapping of the max integer size for xop rotate immediate constraint
295 (define_mode_attr sserotatemax
296 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
298 ;; Mapping of mode to cast intrinsic name
299 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
301 ;; Instruction suffix for sign and zero extensions.
302 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
304 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
305 (define_mode_attr i128
306 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
307 (V8SI "%~128") (V4DI "%~128")])
310 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
312 (define_mode_iterator AVXMODE48P_DI
313 [V2DI V2DF V4DI V4DF V4SF V4SI])
314 (define_mode_attr AVXMODE48P_DI
315 [(V2DI "V2DI") (V2DF "V2DI")
316 (V4DI "V4DI") (V4DF "V4DI")
317 (V4SI "V2DI") (V4SF "V2DI")
318 (V8SI "V4DI") (V8SF "V4DI")])
320 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
322 ;; Mapping of immediate bits for blend instructions
323 (define_mode_attr blendbits
324 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
326 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
334 ;; All of these patterns are enabled for SSE1 as well as SSE2.
335 ;; This is essential for maintaining stable calling conventions.
337 (define_expand "mov<mode>"
338 [(set (match_operand:V16 0 "nonimmediate_operand" "")
339 (match_operand:V16 1 "nonimmediate_operand" ""))]
342 ix86_expand_vector_move (<MODE>mode, operands);
346 (define_insn "*mov<mode>_internal"
347 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
348 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
350 && (register_operand (operands[0], <MODE>mode)
351 || register_operand (operands[1], <MODE>mode))"
353 switch (which_alternative)
356 return standard_sse_constant_opcode (insn, operands[1]);
359 switch (get_attr_mode (insn))
364 && (misaligned_operand (operands[0], <MODE>mode)
365 || misaligned_operand (operands[1], <MODE>mode)))
366 return "vmovups\t{%1, %0|%0, %1}";
368 return "%vmovaps\t{%1, %0|%0, %1}";
373 && (misaligned_operand (operands[0], <MODE>mode)
374 || misaligned_operand (operands[1], <MODE>mode)))
375 return "vmovupd\t{%1, %0|%0, %1}";
376 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
377 return "%vmovaps\t{%1, %0|%0, %1}";
379 return "%vmovapd\t{%1, %0|%0, %1}";
384 && (misaligned_operand (operands[0], <MODE>mode)
385 || misaligned_operand (operands[1], <MODE>mode)))
386 return "vmovdqu\t{%1, %0|%0, %1}";
387 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
388 return "%vmovaps\t{%1, %0|%0, %1}";
390 return "%vmovdqa\t{%1, %0|%0, %1}";
399 [(set_attr "type" "sselog1,ssemov,ssemov")
400 (set_attr "prefix" "maybe_vex")
402 (cond [(match_test "TARGET_AVX")
403 (const_string "<sseinsnmode>")
404 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
405 (not (match_test "TARGET_SSE2")))
406 (and (eq_attr "alternative" "2")
407 (match_test "TARGET_SSE_TYPELESS_STORES")))
408 (const_string "V4SF")
409 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
410 (const_string "V4SF")
411 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
412 (const_string "V2DF")
414 (const_string "TI")))])
416 (define_insn "sse2_movq128"
417 [(set (match_operand:V2DI 0 "register_operand" "=x")
420 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
421 (parallel [(const_int 0)]))
424 "%vmovq\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "maybe_vex")
427 (set_attr "mode" "TI")])
429 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
430 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
431 ;; from memory, we'd prefer to load the memory directly into the %xmm
432 ;; register. To facilitate this happy circumstance, this pattern won't
433 ;; split until after register allocation. If the 64-bit value didn't
434 ;; come from memory, this is the best we can do. This is much better
435 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
438 (define_insn_and_split "movdi_to_sse"
440 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
441 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
442 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
443 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
445 "&& reload_completed"
448 if (register_operand (operands[1], DImode))
450 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
451 Assemble the 64-bit DImode value in an xmm register. */
452 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
453 gen_rtx_SUBREG (SImode, operands[1], 0)));
454 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
455 gen_rtx_SUBREG (SImode, operands[1], 4)));
456 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
459 else if (memory_operand (operands[1], DImode))
460 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
461 operands[1], const0_rtx));
467 [(set (match_operand:V4SF 0 "register_operand" "")
468 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
469 "TARGET_SSE && reload_completed"
472 (vec_duplicate:V4SF (match_dup 1))
476 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
477 operands[2] = CONST0_RTX (V4SFmode);
481 [(set (match_operand:V2DF 0 "register_operand" "")
482 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
483 "TARGET_SSE2 && reload_completed"
484 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
486 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
487 operands[2] = CONST0_RTX (DFmode);
490 (define_expand "push<mode>1"
491 [(match_operand:V16 0 "register_operand" "")]
494 ix86_expand_push (<MODE>mode, operands[0]);
498 (define_expand "movmisalign<mode>"
499 [(set (match_operand:V16 0 "nonimmediate_operand" "")
500 (match_operand:V16 1 "nonimmediate_operand" ""))]
503 ix86_expand_vector_move_misalign (<MODE>mode, operands);
507 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
508 [(set (match_operand:VF 0 "nonimmediate_operand" "")
510 [(match_operand:VF 1 "nonimmediate_operand" "")]
514 if (MEM_P (operands[0]) && MEM_P (operands[1]))
515 operands[1] = force_reg (<MODE>mode, operands[1]);
518 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
519 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
521 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
523 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
524 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
525 [(set_attr "type" "ssemov")
526 (set_attr "movu" "1")
527 (set_attr "prefix" "maybe_vex")
528 (set_attr "mode" "<MODE>")])
530 (define_expand "<sse2>_movdqu<avxsizesuffix>"
531 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
532 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
536 if (MEM_P (operands[0]) && MEM_P (operands[1]))
537 operands[1] = force_reg (<MODE>mode, operands[1]);
540 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
541 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
542 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
544 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
545 "%vmovdqu\t{%1, %0|%0, %1}"
546 [(set_attr "type" "ssemov")
547 (set_attr "movu" "1")
548 (set (attr "prefix_data16")
550 (match_test "TARGET_AVX")
553 (set_attr "prefix" "maybe_vex")
554 (set_attr "mode" "<sseinsnmode>")])
556 (define_insn "<sse3>_lddqu<avxsizesuffix>"
557 [(set (match_operand:VI1 0 "register_operand" "=x")
558 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
561 "%vlddqu\t{%1, %0|%0, %1}"
562 [(set_attr "type" "ssemov")
563 (set_attr "movu" "1")
564 (set (attr "prefix_data16")
566 (match_test "TARGET_AVX")
569 (set (attr "prefix_rep")
571 (match_test "TARGET_AVX")
574 (set_attr "prefix" "maybe_vex")
575 (set_attr "mode" "<sseinsnmode>")])
577 (define_insn "sse2_movntsi"
578 [(set (match_operand:SI 0 "memory_operand" "=m")
579 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
582 "movnti\t{%1, %0|%0, %1}"
583 [(set_attr "type" "ssemov")
584 (set_attr "prefix_data16" "0")
585 (set_attr "mode" "V2DF")])
587 (define_insn "<sse>_movnt<mode>"
588 [(set (match_operand:VF 0 "memory_operand" "=m")
589 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
592 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
593 [(set_attr "type" "ssemov")
594 (set_attr "prefix" "maybe_vex")
595 (set_attr "mode" "<MODE>")])
597 (define_insn "<sse2>_movnt<mode>"
598 [(set (match_operand:VI8 0 "memory_operand" "=m")
599 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
602 "%vmovntdq\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssecvt")
604 (set (attr "prefix_data16")
606 (match_test "TARGET_AVX")
609 (set_attr "prefix" "maybe_vex")
610 (set_attr "mode" "<sseinsnmode>")])
612 ; Expand patterns for non-temporal stores. At the moment, only those
613 ; that directly map to insns are defined; it would be possible to
614 ; define patterns for other modes that would expand to several insns.
616 ;; Modes handled by storent patterns.
617 (define_mode_iterator STORENT_MODE
618 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
620 (V8SF "TARGET_AVX") V4SF
621 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
623 (define_expand "storent<mode>"
624 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
626 [(match_operand:STORENT_MODE 1 "register_operand" "")]
630 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
632 ;; Parallel floating point arithmetic
634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
636 (define_expand "<code><mode>2"
637 [(set (match_operand:VF 0 "register_operand" "")
639 (match_operand:VF 1 "register_operand" "")))]
641 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
643 (define_insn_and_split "*absneg<mode>2"
644 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
645 (match_operator:VF 3 "absneg_operator"
646 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
647 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
650 "&& reload_completed"
653 enum rtx_code absneg_op;
659 if (MEM_P (operands[1]))
660 op1 = operands[2], op2 = operands[1];
662 op1 = operands[1], op2 = operands[2];
667 if (rtx_equal_p (operands[0], operands[1]))
673 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
674 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
675 t = gen_rtx_SET (VOIDmode, operands[0], t);
679 [(set_attr "isa" "noavx,noavx,avx,avx")])
681 (define_expand "<plusminus_insn><mode>3"
682 [(set (match_operand:VF 0 "register_operand" "")
684 (match_operand:VF 1 "nonimmediate_operand" "")
685 (match_operand:VF 2 "nonimmediate_operand" "")))]
687 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
689 (define_insn "*<plusminus_insn><mode>3"
690 [(set (match_operand:VF 0 "register_operand" "=x,x")
692 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
693 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
694 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
696 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
697 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
698 [(set_attr "isa" "noavx,avx")
699 (set_attr "type" "sseadd")
700 (set_attr "prefix" "orig,vex")
701 (set_attr "mode" "<MODE>")])
703 (define_insn "<sse>_vm<plusminus_insn><mode>3"
704 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
707 (match_operand:VF_128 1 "register_operand" "0,x")
708 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
713 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
714 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
715 [(set_attr "isa" "noavx,avx")
716 (set_attr "type" "sseadd")
717 (set_attr "prefix" "orig,vex")
718 (set_attr "mode" "<ssescalarmode>")])
720 (define_expand "mul<mode>3"
721 [(set (match_operand:VF 0 "register_operand" "")
723 (match_operand:VF 1 "nonimmediate_operand" "")
724 (match_operand:VF 2 "nonimmediate_operand" "")))]
726 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
728 (define_insn "*mul<mode>3"
729 [(set (match_operand:VF 0 "register_operand" "=x,x")
731 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
732 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
733 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
735 mul<ssemodesuffix>\t{%2, %0|%0, %2}
736 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
737 [(set_attr "isa" "noavx,avx")
738 (set_attr "type" "ssemul")
739 (set_attr "prefix" "orig,vex")
740 (set_attr "mode" "<MODE>")])
742 (define_insn "<sse>_vmmul<mode>3"
743 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
746 (match_operand:VF_128 1 "register_operand" "0,x")
747 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
752 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
753 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
754 [(set_attr "isa" "noavx,avx")
755 (set_attr "type" "ssemul")
756 (set_attr "prefix" "orig,vex")
757 (set_attr "mode" "<ssescalarmode>")])
759 (define_expand "div<mode>3"
760 [(set (match_operand:VF2 0 "register_operand" "")
761 (div:VF2 (match_operand:VF2 1 "register_operand" "")
762 (match_operand:VF2 2 "nonimmediate_operand" "")))]
764 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
766 (define_expand "div<mode>3"
767 [(set (match_operand:VF1 0 "register_operand" "")
768 (div:VF1 (match_operand:VF1 1 "register_operand" "")
769 (match_operand:VF1 2 "nonimmediate_operand" "")))]
772 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
775 && TARGET_RECIP_VEC_DIV
776 && !optimize_insn_for_size_p ()
777 && flag_finite_math_only && !flag_trapping_math
778 && flag_unsafe_math_optimizations)
780 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
785 (define_insn "<sse>_div<mode>3"
786 [(set (match_operand:VF 0 "register_operand" "=x,x")
788 (match_operand:VF 1 "register_operand" "0,x")
789 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
792 div<ssemodesuffix>\t{%2, %0|%0, %2}
793 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "ssediv")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "<MODE>")])
799 (define_insn "<sse>_vmdiv<mode>3"
800 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
803 (match_operand:VF_128 1 "register_operand" "0,x")
804 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
809 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
810 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
811 [(set_attr "isa" "noavx,avx")
812 (set_attr "type" "ssediv")
813 (set_attr "prefix" "orig,vex")
814 (set_attr "mode" "<ssescalarmode>")])
816 (define_insn "<sse>_rcp<mode>2"
817 [(set (match_operand:VF1 0 "register_operand" "=x")
819 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
821 "%vrcpps\t{%1, %0|%0, %1}"
822 [(set_attr "type" "sse")
823 (set_attr "atom_sse_attr" "rcp")
824 (set_attr "prefix" "maybe_vex")
825 (set_attr "mode" "<MODE>")])
827 (define_insn "sse_vmrcpv4sf2"
828 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
830 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
832 (match_operand:V4SF 2 "register_operand" "0,x")
836 rcpss\t{%1, %0|%0, %1}
837 vrcpss\t{%1, %2, %0|%0, %2, %1}"
838 [(set_attr "isa" "noavx,avx")
839 (set_attr "type" "sse")
840 (set_attr "atom_sse_attr" "rcp")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "SF")])
844 (define_expand "sqrt<mode>2"
845 [(set (match_operand:VF2 0 "register_operand" "")
846 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
849 (define_expand "sqrt<mode>2"
850 [(set (match_operand:VF1 0 "register_operand" "")
851 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
855 && TARGET_RECIP_VEC_SQRT
856 && !optimize_insn_for_size_p ()
857 && flag_finite_math_only && !flag_trapping_math
858 && flag_unsafe_math_optimizations)
860 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
865 (define_insn "<sse>_sqrt<mode>2"
866 [(set (match_operand:VF 0 "register_operand" "=x")
867 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
869 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
870 [(set_attr "type" "sse")
871 (set_attr "atom_sse_attr" "sqrt")
872 (set_attr "prefix" "maybe_vex")
873 (set_attr "mode" "<MODE>")])
875 (define_insn "<sse>_vmsqrt<mode>2"
876 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
879 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
880 (match_operand:VF_128 2 "register_operand" "0,x")
884 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
885 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
886 [(set_attr "isa" "noavx,avx")
887 (set_attr "type" "sse")
888 (set_attr "atom_sse_attr" "sqrt")
889 (set_attr "prefix" "orig,vex")
890 (set_attr "mode" "<ssescalarmode>")])
892 (define_expand "rsqrt<mode>2"
893 [(set (match_operand:VF1 0 "register_operand" "")
895 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
898 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
902 (define_insn "<sse>_rsqrt<mode>2"
903 [(set (match_operand:VF1 0 "register_operand" "=x")
905 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
907 "%vrsqrtps\t{%1, %0|%0, %1}"
908 [(set_attr "type" "sse")
909 (set_attr "prefix" "maybe_vex")
910 (set_attr "mode" "<MODE>")])
912 (define_insn "sse_vmrsqrtv4sf2"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
917 (match_operand:V4SF 2 "register_operand" "0,x")
921 rsqrtss\t{%1, %0|%0, %1}
922 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sse")
925 (set_attr "prefix" "orig,vex")
926 (set_attr "mode" "SF")])
928 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
929 ;; isn't really correct, as those rtl operators aren't defined when
930 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
932 (define_expand "<code><mode>3"
933 [(set (match_operand:VF 0 "register_operand" "")
935 (match_operand:VF 1 "nonimmediate_operand" "")
936 (match_operand:VF 2 "nonimmediate_operand" "")))]
939 if (!flag_finite_math_only)
940 operands[1] = force_reg (<MODE>mode, operands[1]);
941 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
944 (define_insn "*<code><mode>3_finite"
945 [(set (match_operand:VF 0 "register_operand" "=x,x")
947 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
948 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
949 "TARGET_SSE && flag_finite_math_only
950 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
952 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
953 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
954 [(set_attr "isa" "noavx,avx")
955 (set_attr "type" "sseadd")
956 (set_attr "prefix" "orig,vex")
957 (set_attr "mode" "<MODE>")])
959 (define_insn "*<code><mode>3"
960 [(set (match_operand:VF 0 "register_operand" "=x,x")
962 (match_operand:VF 1 "register_operand" "0,x")
963 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
964 "TARGET_SSE && !flag_finite_math_only"
966 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
967 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
968 [(set_attr "isa" "noavx,avx")
969 (set_attr "type" "sseadd")
970 (set_attr "prefix" "orig,vex")
971 (set_attr "mode" "<MODE>")])
973 (define_insn "<sse>_vm<code><mode>3"
974 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
977 (match_operand:VF_128 1 "register_operand" "0,x")
978 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
983 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
984 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
985 [(set_attr "isa" "noavx,avx")
986 (set_attr "type" "sse")
987 (set_attr "prefix" "orig,vex")
988 (set_attr "mode" "<ssescalarmode>")])
990 ;; These versions of the min/max patterns implement exactly the operations
991 ;; min = (op1 < op2 ? op1 : op2)
992 ;; max = (!(op1 < op2) ? op1 : op2)
993 ;; Their operands are not commutative, and thus they may be used in the
994 ;; presence of -0.0 and NaN.
996 (define_insn "*ieee_smin<mode>3"
997 [(set (match_operand:VF 0 "register_operand" "=x,x")
999 [(match_operand:VF 1 "register_operand" "0,x")
1000 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1004 min<ssemodesuffix>\t{%2, %0|%0, %2}
1005 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1006 [(set_attr "isa" "noavx,avx")
1007 (set_attr "type" "sseadd")
1008 (set_attr "prefix" "orig,vex")
1009 (set_attr "mode" "<MODE>")])
1011 (define_insn "*ieee_smax<mode>3"
1012 [(set (match_operand:VF 0 "register_operand" "=x,x")
1014 [(match_operand:VF 1 "register_operand" "0,x")
1015 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1019 max<ssemodesuffix>\t{%2, %0|%0, %2}
1020 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1021 [(set_attr "isa" "noavx,avx")
1022 (set_attr "type" "sseadd")
1023 (set_attr "prefix" "orig,vex")
1024 (set_attr "mode" "<MODE>")])
1026 (define_insn "avx_addsubv4df3"
1027 [(set (match_operand:V4DF 0 "register_operand" "=x")
1030 (match_operand:V4DF 1 "register_operand" "x")
1031 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1032 (minus:V4DF (match_dup 1) (match_dup 2))
1035 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1036 [(set_attr "type" "sseadd")
1037 (set_attr "prefix" "vex")
1038 (set_attr "mode" "V4DF")])
1040 (define_insn "sse3_addsubv2df3"
1041 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1044 (match_operand:V2DF 1 "register_operand" "0,x")
1045 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1046 (minus:V2DF (match_dup 1) (match_dup 2))
1050 addsubpd\t{%2, %0|%0, %2}
1051 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1052 [(set_attr "isa" "noavx,avx")
1053 (set_attr "type" "sseadd")
1054 (set_attr "atom_unit" "complex")
1055 (set_attr "prefix" "orig,vex")
1056 (set_attr "mode" "V2DF")])
1058 (define_insn "avx_addsubv8sf3"
1059 [(set (match_operand:V8SF 0 "register_operand" "=x")
1062 (match_operand:V8SF 1 "register_operand" "x")
1063 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1064 (minus:V8SF (match_dup 1) (match_dup 2))
1067 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1068 [(set_attr "type" "sseadd")
1069 (set_attr "prefix" "vex")
1070 (set_attr "mode" "V8SF")])
1072 (define_insn "sse3_addsubv4sf3"
1073 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1076 (match_operand:V4SF 1 "register_operand" "0,x")
1077 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1078 (minus:V4SF (match_dup 1) (match_dup 2))
1082 addsubps\t{%2, %0|%0, %2}
1083 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1084 [(set_attr "isa" "noavx,avx")
1085 (set_attr "type" "sseadd")
1086 (set_attr "prefix" "orig,vex")
1087 (set_attr "prefix_rep" "1,*")
1088 (set_attr "mode" "V4SF")])
1090 (define_insn "avx_h<plusminus_insn>v4df3"
1091 [(set (match_operand:V4DF 0 "register_operand" "=x")
1096 (match_operand:V4DF 1 "register_operand" "x")
1097 (parallel [(const_int 0)]))
1098 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1100 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1101 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1105 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1106 (parallel [(const_int 0)]))
1107 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1109 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1110 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1112 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "prefix" "vex")
1115 (set_attr "mode" "V4DF")])
1117 (define_insn "sse3_h<plusminus_insn>v2df3"
1118 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1122 (match_operand:V2DF 1 "register_operand" "0,x")
1123 (parallel [(const_int 0)]))
1124 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1127 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1128 (parallel [(const_int 0)]))
1129 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1132 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1133 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1134 [(set_attr "isa" "noavx,avx")
1135 (set_attr "type" "sseadd")
1136 (set_attr "prefix" "orig,vex")
1137 (set_attr "mode" "V2DF")])
1139 (define_insn "avx_h<plusminus_insn>v8sf3"
1140 [(set (match_operand:V8SF 0 "register_operand" "=x")
1146 (match_operand:V8SF 1 "register_operand" "x")
1147 (parallel [(const_int 0)]))
1148 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1150 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1151 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1155 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1156 (parallel [(const_int 0)]))
1157 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1159 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1160 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1165 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1167 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1168 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1171 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1174 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1175 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1177 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1178 [(set_attr "type" "sseadd")
1179 (set_attr "prefix" "vex")
1180 (set_attr "mode" "V8SF")])
1182 (define_insn "sse3_h<plusminus_insn>v4sf3"
1183 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1188 (match_operand:V4SF 1 "register_operand" "0,x")
1189 (parallel [(const_int 0)]))
1190 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1192 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1193 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1197 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1198 (parallel [(const_int 0)]))
1199 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1201 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1202 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1205 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1206 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "isa" "noavx,avx")
1208 (set_attr "type" "sseadd")
1209 (set_attr "atom_unit" "complex")
1210 (set_attr "prefix" "orig,vex")
1211 (set_attr "prefix_rep" "1,*")
1212 (set_attr "mode" "V4SF")])
1214 (define_expand "reduc_splus_v4df"
1215 [(match_operand:V4DF 0 "register_operand" "")
1216 (match_operand:V4DF 1 "register_operand" "")]
1219 rtx tmp = gen_reg_rtx (V4DFmode);
1220 rtx tmp2 = gen_reg_rtx (V4DFmode);
1221 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1222 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1223 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1227 (define_expand "reduc_splus_v2df"
1228 [(match_operand:V2DF 0 "register_operand" "")
1229 (match_operand:V2DF 1 "register_operand" "")]
1232 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1236 (define_expand "reduc_splus_v8sf"
1237 [(match_operand:V8SF 0 "register_operand" "")
1238 (match_operand:V8SF 1 "register_operand" "")]
1241 rtx tmp = gen_reg_rtx (V8SFmode);
1242 rtx tmp2 = gen_reg_rtx (V8SFmode);
1243 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1244 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1245 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1246 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1250 (define_expand "reduc_splus_v4sf"
1251 [(match_operand:V4SF 0 "register_operand" "")
1252 (match_operand:V4SF 1 "register_operand" "")]
1257 rtx tmp = gen_reg_rtx (V4SFmode);
1258 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1259 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1262 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1266 ;; Modes handled by reduc_sm{in,ax}* patterns.
1267 (define_mode_iterator REDUC_SMINMAX_MODE
1268 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1269 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1270 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1271 (V4SF "TARGET_SSE")])
1273 (define_expand "reduc_<code>_<mode>"
1274 [(smaxmin:REDUC_SMINMAX_MODE
1275 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1276 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1279 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1283 (define_expand "reduc_<code>_<mode>"
1285 (match_operand:VI_256 0 "register_operand" "")
1286 (match_operand:VI_256 1 "register_operand" ""))]
1289 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1293 (define_expand "reduc_umin_v8hi"
1295 (match_operand:V8HI 0 "register_operand" "")
1296 (match_operand:V8HI 1 "register_operand" ""))]
1299 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1305 ;; Parallel floating point comparisons
1307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1309 (define_insn "avx_cmp<mode>3"
1310 [(set (match_operand:VF 0 "register_operand" "=x")
1312 [(match_operand:VF 1 "register_operand" "x")
1313 (match_operand:VF 2 "nonimmediate_operand" "xm")
1314 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1317 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1318 [(set_attr "type" "ssecmp")
1319 (set_attr "length_immediate" "1")
1320 (set_attr "prefix" "vex")
1321 (set_attr "mode" "<MODE>")])
1323 (define_insn "avx_vmcmp<mode>3"
1324 [(set (match_operand:VF_128 0 "register_operand" "=x")
1327 [(match_operand:VF_128 1 "register_operand" "x")
1328 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1329 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1334 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1335 [(set_attr "type" "ssecmp")
1336 (set_attr "length_immediate" "1")
1337 (set_attr "prefix" "vex")
1338 (set_attr "mode" "<ssescalarmode>")])
1340 (define_insn "*<sse>_maskcmp<mode>3_comm"
1341 [(set (match_operand:VF 0 "register_operand" "=x,x")
1342 (match_operator:VF 3 "sse_comparison_operator"
1343 [(match_operand:VF 1 "register_operand" "%0,x")
1344 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1346 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1348 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1349 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1350 [(set_attr "isa" "noavx,avx")
1351 (set_attr "type" "ssecmp")
1352 (set_attr "length_immediate" "1")
1353 (set_attr "prefix" "orig,vex")
1354 (set_attr "mode" "<MODE>")])
1356 (define_insn "<sse>_maskcmp<mode>3"
1357 [(set (match_operand:VF 0 "register_operand" "=x,x")
1358 (match_operator:VF 3 "sse_comparison_operator"
1359 [(match_operand:VF 1 "register_operand" "0,x")
1360 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1363 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1364 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1365 [(set_attr "isa" "noavx,avx")
1366 (set_attr "type" "ssecmp")
1367 (set_attr "length_immediate" "1")
1368 (set_attr "prefix" "orig,vex")
1369 (set_attr "mode" "<MODE>")])
1371 (define_insn "<sse>_vmmaskcmp<mode>3"
1372 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1374 (match_operator:VF_128 3 "sse_comparison_operator"
1375 [(match_operand:VF_128 1 "register_operand" "0,x")
1376 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1381 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1382 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1383 [(set_attr "isa" "noavx,avx")
1384 (set_attr "type" "ssecmp")
1385 (set_attr "length_immediate" "1,*")
1386 (set_attr "prefix" "orig,vex")
1387 (set_attr "mode" "<ssescalarmode>")])
1389 (define_insn "<sse>_comi"
1390 [(set (reg:CCFP FLAGS_REG)
1393 (match_operand:<ssevecmode> 0 "register_operand" "x")
1394 (parallel [(const_int 0)]))
1396 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1397 (parallel [(const_int 0)]))))]
1398 "SSE_FLOAT_MODE_P (<MODE>mode)"
1399 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1400 [(set_attr "type" "ssecomi")
1401 (set_attr "prefix" "maybe_vex")
1402 (set_attr "prefix_rep" "0")
1403 (set (attr "prefix_data16")
1404 (if_then_else (eq_attr "mode" "DF")
1406 (const_string "0")))
1407 (set_attr "mode" "<MODE>")])
1409 (define_insn "<sse>_ucomi"
1410 [(set (reg:CCFPU FLAGS_REG)
1413 (match_operand:<ssevecmode> 0 "register_operand" "x")
1414 (parallel [(const_int 0)]))
1416 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1417 (parallel [(const_int 0)]))))]
1418 "SSE_FLOAT_MODE_P (<MODE>mode)"
1419 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1420 [(set_attr "type" "ssecomi")
1421 (set_attr "prefix" "maybe_vex")
1422 (set_attr "prefix_rep" "0")
1423 (set (attr "prefix_data16")
1424 (if_then_else (eq_attr "mode" "DF")
1426 (const_string "0")))
1427 (set_attr "mode" "<MODE>")])
1429 (define_expand "vcond<V_256:mode><VF_256:mode>"
1430 [(set (match_operand:V_256 0 "register_operand" "")
1432 (match_operator 3 ""
1433 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1434 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1435 (match_operand:V_256 1 "general_operand" "")
1436 (match_operand:V_256 2 "general_operand" "")))]
1438 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1439 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1441 bool ok = ix86_expand_fp_vcond (operands);
1446 (define_expand "vcond<V_128:mode><VF_128:mode>"
1447 [(set (match_operand:V_128 0 "register_operand" "")
1449 (match_operator 3 ""
1450 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1451 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1452 (match_operand:V_128 1 "general_operand" "")
1453 (match_operand:V_128 2 "general_operand" "")))]
1455 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1456 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1458 bool ok = ix86_expand_fp_vcond (operands);
1463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1465 ;; Parallel floating point logical operations
1467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1469 (define_insn "<sse>_andnot<mode>3"
1470 [(set (match_operand:VF 0 "register_operand" "=x,x")
1473 (match_operand:VF 1 "register_operand" "0,x"))
1474 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1477 static char buf[32];
1480 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1482 switch (which_alternative)
1485 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1488 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1494 snprintf (buf, sizeof (buf), insn, suffix);
1497 [(set_attr "isa" "noavx,avx")
1498 (set_attr "type" "sselog")
1499 (set_attr "prefix" "orig,vex")
1500 (set_attr "mode" "<MODE>")])
1502 (define_expand "<code><mode>3"
1503 [(set (match_operand:VF 0 "register_operand" "")
1505 (match_operand:VF 1 "nonimmediate_operand" "")
1506 (match_operand:VF 2 "nonimmediate_operand" "")))]
1508 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1510 (define_insn "*<code><mode>3"
1511 [(set (match_operand:VF 0 "register_operand" "=x,x")
1513 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1514 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1515 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1517 static char buf[32];
1520 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1522 switch (which_alternative)
1525 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1528 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1534 snprintf (buf, sizeof (buf), insn, suffix);
1537 [(set_attr "isa" "noavx,avx")
1538 (set_attr "type" "sselog")
1539 (set_attr "prefix" "orig,vex")
1540 (set_attr "mode" "<MODE>")])
1542 (define_expand "copysign<mode>3"
1545 (not:VF (match_dup 3))
1546 (match_operand:VF 1 "nonimmediate_operand" "")))
1548 (and:VF (match_dup 3)
1549 (match_operand:VF 2 "nonimmediate_operand" "")))
1550 (set (match_operand:VF 0 "register_operand" "")
1551 (ior:VF (match_dup 4) (match_dup 5)))]
1554 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1556 operands[4] = gen_reg_rtx (<MODE>mode);
1557 operands[5] = gen_reg_rtx (<MODE>mode);
1560 ;; Also define scalar versions. These are used for abs, neg, and
1561 ;; conditional move. Using subregs into vector modes causes register
1562 ;; allocation lossage. These patterns do not allow memory operands
1563 ;; because the native instructions read the full 128-bits.
1565 (define_insn "*andnot<mode>3"
1566 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1569 (match_operand:MODEF 1 "register_operand" "0,x"))
1570 (match_operand:MODEF 2 "register_operand" "x,x")))]
1571 "SSE_FLOAT_MODE_P (<MODE>mode)"
1573 static char buf[32];
1576 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1578 switch (which_alternative)
1581 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1584 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1590 snprintf (buf, sizeof (buf), insn, suffix);
1593 [(set_attr "isa" "noavx,avx")
1594 (set_attr "type" "sselog")
1595 (set_attr "prefix" "orig,vex")
1596 (set_attr "mode" "<ssevecmode>")])
1598 (define_insn "*<code><mode>3"
1599 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1601 (match_operand:MODEF 1 "register_operand" "%0,x")
1602 (match_operand:MODEF 2 "register_operand" "x,x")))]
1603 "SSE_FLOAT_MODE_P (<MODE>mode)"
1605 static char buf[32];
1608 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1610 switch (which_alternative)
1613 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1616 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1622 snprintf (buf, sizeof (buf), insn, suffix);
1625 [(set_attr "isa" "noavx,avx")
1626 (set_attr "type" "sselog")
1627 (set_attr "prefix" "orig,vex")
1628 (set_attr "mode" "<ssevecmode>")])
1630 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1632 ;; FMA4 floating point multiply/accumulate instructions. This
1633 ;; includes the scalar version of the instructions as well as the
1636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1638 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1639 ;; combine to generate a multiply/add with two memory references. We then
1640 ;; split this insn, into loading up the destination register with one of the
1641 ;; memory operations. If we don't manage to split the insn, reload will
1642 ;; generate the appropriate moves. The reason this is needed, is that combine
1643 ;; has already folded one of the memory references into both the multiply and
1644 ;; add insns, and it can't generate a new pseudo. I.e.:
1645 ;; (set (reg1) (mem (addr1)))
1646 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1647 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1649 ;; ??? This is historic, pre-dating the gimple fma transformation.
1650 ;; We could now properly represent that only one memory operand is
1651 ;; allowed and not be penalized during optimization.
1653 ;; Intrinsic FMA operations.
1655 ;; The standard names for fma is only available with SSE math enabled.
1656 (define_expand "fma<mode>4"
1657 [(set (match_operand:FMAMODE 0 "register_operand")
1659 (match_operand:FMAMODE 1 "nonimmediate_operand")
1660 (match_operand:FMAMODE 2 "nonimmediate_operand")
1661 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1662 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1664 (define_expand "fms<mode>4"
1665 [(set (match_operand:FMAMODE 0 "register_operand")
1667 (match_operand:FMAMODE 1 "nonimmediate_operand")
1668 (match_operand:FMAMODE 2 "nonimmediate_operand")
1669 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1670 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1672 (define_expand "fnma<mode>4"
1673 [(set (match_operand:FMAMODE 0 "register_operand")
1675 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1676 (match_operand:FMAMODE 2 "nonimmediate_operand")
1677 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1678 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1680 (define_expand "fnms<mode>4"
1681 [(set (match_operand:FMAMODE 0 "register_operand")
1683 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1684 (match_operand:FMAMODE 2 "nonimmediate_operand")
1685 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1686 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1688 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1689 (define_expand "fma4i_fmadd_<mode>"
1690 [(set (match_operand:FMAMODE 0 "register_operand")
1692 (match_operand:FMAMODE 1 "nonimmediate_operand")
1693 (match_operand:FMAMODE 2 "nonimmediate_operand")
1694 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1695 "TARGET_FMA || TARGET_FMA4")
1697 (define_insn "*fma4i_fmadd_<mode>"
1698 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1700 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1701 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1702 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1704 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1705 [(set_attr "type" "ssemuladd")
1706 (set_attr "mode" "<MODE>")])
1708 (define_insn "*fma4i_fmsub_<mode>"
1709 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1711 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1712 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1714 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1716 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1717 [(set_attr "type" "ssemuladd")
1718 (set_attr "mode" "<MODE>")])
1720 (define_insn "*fma4i_fnmadd_<mode>"
1721 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1724 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1725 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1726 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1728 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1729 [(set_attr "type" "ssemuladd")
1730 (set_attr "mode" "<MODE>")])
1732 (define_insn "*fma4i_fnmsub_<mode>"
1733 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1736 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1737 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1739 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1741 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1742 [(set_attr "type" "ssemuladd")
1743 (set_attr "mode" "<MODE>")])
1745 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1746 ;; entire destination register, with the high-order elements zeroed.
1748 (define_expand "fma4i_vmfmadd_<mode>"
1749 [(set (match_operand:VF_128 0 "register_operand")
1752 (match_operand:VF_128 1 "nonimmediate_operand")
1753 (match_operand:VF_128 2 "nonimmediate_operand")
1754 (match_operand:VF_128 3 "nonimmediate_operand"))
1759 operands[4] = CONST0_RTX (<MODE>mode);
1762 (define_expand "fmai_vmfmadd_<mode>"
1763 [(set (match_operand:VF_128 0 "register_operand")
1766 (match_operand:VF_128 1 "nonimmediate_operand")
1767 (match_operand:VF_128 2 "nonimmediate_operand")
1768 (match_operand:VF_128 3 "nonimmediate_operand"))
1773 (define_insn "*fmai_fmadd_<mode>"
1774 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1777 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1778 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1779 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1784 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1785 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1786 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1787 [(set_attr "type" "ssemuladd")
1788 (set_attr "mode" "<MODE>")])
1790 (define_insn "*fmai_fmsub_<mode>"
1791 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1794 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1795 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1797 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1802 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1803 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1804 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1805 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")])
1808 (define_insn "*fmai_fnmadd_<mode>"
1809 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1813 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1814 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1815 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1820 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1821 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1822 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1823 [(set_attr "type" "ssemuladd")
1824 (set_attr "mode" "<MODE>")])
1826 (define_insn "*fmai_fnmsub_<mode>"
1827 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1831 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1832 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1834 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1839 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1840 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1841 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1842 [(set_attr "type" "ssemuladd")
1843 (set_attr "mode" "<MODE>")])
1845 (define_insn "*fma4i_vmfmadd_<mode>"
1846 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1849 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1850 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1851 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1852 (match_operand:VF_128 4 "const0_operand" "")
1855 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1856 [(set_attr "type" "ssemuladd")
1857 (set_attr "mode" "<MODE>")])
1859 (define_insn "*fma4i_vmfmsub_<mode>"
1860 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1863 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1864 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1866 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1867 (match_operand:VF_128 4 "const0_operand" "")
1870 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1871 [(set_attr "type" "ssemuladd")
1872 (set_attr "mode" "<MODE>")])
1874 (define_insn "*fma4i_vmfnmadd_<mode>"
1875 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1879 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1880 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1881 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1882 (match_operand:VF_128 4 "const0_operand" "")
1885 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1886 [(set_attr "type" "ssemuladd")
1887 (set_attr "mode" "<MODE>")])
1889 (define_insn "*fma4i_vmfnmsub_<mode>"
1890 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1894 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1895 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1897 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1898 (match_operand:VF_128 4 "const0_operand" "")
1901 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1907 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1911 ;; It would be possible to represent these without the UNSPEC as
1914 ;; (fma op1 op2 op3)
1915 ;; (fma op1 op2 (neg op3))
1918 ;; But this doesn't seem useful in practice.
1920 (define_expand "fmaddsub_<mode>"
1921 [(set (match_operand:VF 0 "register_operand")
1923 [(match_operand:VF 1 "nonimmediate_operand")
1924 (match_operand:VF 2 "nonimmediate_operand")
1925 (match_operand:VF 3 "nonimmediate_operand")]
1927 "TARGET_FMA || TARGET_FMA4")
1929 (define_insn "*fma4_fmaddsub_<mode>"
1930 [(set (match_operand:VF 0 "register_operand" "=x,x")
1932 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1933 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1934 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1937 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1938 [(set_attr "type" "ssemuladd")
1939 (set_attr "mode" "<MODE>")])
1941 (define_insn "*fma4_fmsubadd_<mode>"
1942 [(set (match_operand:VF 0 "register_operand" "=x,x")
1944 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1945 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1947 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1950 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1951 [(set_attr "type" "ssemuladd")
1952 (set_attr "mode" "<MODE>")])
1954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1956 ;; FMA3 floating point multiply/accumulate instructions.
1958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1960 (define_insn "*fma_fmadd_<mode>"
1961 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1963 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1964 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1965 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1968 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1969 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1970 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1971 [(set_attr "type" "ssemuladd")
1972 (set_attr "mode" "<MODE>")])
1974 (define_insn "*fma_fmsub_<mode>"
1975 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1977 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1978 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1980 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1983 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1984 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1985 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1986 [(set_attr "type" "ssemuladd")
1987 (set_attr "mode" "<MODE>")])
1989 (define_insn "*fma_fnmadd_<mode>"
1990 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1993 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1994 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1995 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1998 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1999 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2000 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 (define_insn "*fma_fnmsub_<mode>"
2005 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2008 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2009 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2011 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2014 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2015 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2016 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2020 (define_insn "*fma_fmaddsub_<mode>"
2021 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2023 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2024 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2025 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2029 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2030 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2031 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2032 [(set_attr "type" "ssemuladd")
2033 (set_attr "mode" "<MODE>")])
2035 (define_insn "*fma_fmsubadd_<mode>"
2036 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2038 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2039 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2041 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2045 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2046 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2047 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2048 [(set_attr "type" "ssemuladd")
2049 (set_attr "mode" "<MODE>")])
2051 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2053 ;; Parallel single-precision floating point conversion operations
2055 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2057 (define_insn "sse_cvtpi2ps"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2062 (match_operand:V4SF 1 "register_operand" "0")
2065 "cvtpi2ps\t{%2, %0|%0, %2}"
2066 [(set_attr "type" "ssecvt")
2067 (set_attr "mode" "V4SF")])
2069 (define_insn "sse_cvtps2pi"
2070 [(set (match_operand:V2SI 0 "register_operand" "=y")
2072 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2074 (parallel [(const_int 0) (const_int 1)])))]
2076 "cvtps2pi\t{%1, %0|%0, %1}"
2077 [(set_attr "type" "ssecvt")
2078 (set_attr "unit" "mmx")
2079 (set_attr "mode" "DI")])
2081 (define_insn "sse_cvttps2pi"
2082 [(set (match_operand:V2SI 0 "register_operand" "=y")
2084 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2085 (parallel [(const_int 0) (const_int 1)])))]
2087 "cvttps2pi\t{%1, %0|%0, %1}"
2088 [(set_attr "type" "ssecvt")
2089 (set_attr "unit" "mmx")
2090 (set_attr "prefix_rep" "0")
2091 (set_attr "mode" "SF")])
2093 (define_insn "sse_cvtsi2ss"
2094 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2097 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2098 (match_operand:V4SF 1 "register_operand" "0,0,x")
2102 cvtsi2ss\t{%2, %0|%0, %2}
2103 cvtsi2ss\t{%2, %0|%0, %2}
2104 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2105 [(set_attr "isa" "noavx,noavx,avx")
2106 (set_attr "type" "sseicvt")
2107 (set_attr "athlon_decode" "vector,double,*")
2108 (set_attr "amdfam10_decode" "vector,double,*")
2109 (set_attr "bdver1_decode" "double,direct,*")
2110 (set_attr "prefix" "orig,orig,vex")
2111 (set_attr "mode" "SF")])
2113 (define_insn "sse_cvtsi2ssq"
2114 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2117 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2118 (match_operand:V4SF 1 "register_operand" "0,0,x")
2120 "TARGET_SSE && TARGET_64BIT"
2122 cvtsi2ssq\t{%2, %0|%0, %2}
2123 cvtsi2ssq\t{%2, %0|%0, %2}
2124 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2125 [(set_attr "isa" "noavx,noavx,avx")
2126 (set_attr "type" "sseicvt")
2127 (set_attr "athlon_decode" "vector,double,*")
2128 (set_attr "amdfam10_decode" "vector,double,*")
2129 (set_attr "bdver1_decode" "double,direct,*")
2130 (set_attr "length_vex" "*,*,4")
2131 (set_attr "prefix_rex" "1,1,*")
2132 (set_attr "prefix" "orig,orig,vex")
2133 (set_attr "mode" "SF")])
2135 (define_insn "sse_cvtss2si"
2136 [(set (match_operand:SI 0 "register_operand" "=r,r")
2139 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2140 (parallel [(const_int 0)]))]
2141 UNSPEC_FIX_NOTRUNC))]
2143 "%vcvtss2si\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sseicvt")
2145 (set_attr "athlon_decode" "double,vector")
2146 (set_attr "bdver1_decode" "double,double")
2147 (set_attr "prefix_rep" "1")
2148 (set_attr "prefix" "maybe_vex")
2149 (set_attr "mode" "SI")])
2151 (define_insn "sse_cvtss2si_2"
2152 [(set (match_operand:SI 0 "register_operand" "=r,r")
2153 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2154 UNSPEC_FIX_NOTRUNC))]
2156 "%vcvtss2si\t{%1, %0|%0, %1}"
2157 [(set_attr "type" "sseicvt")
2158 (set_attr "athlon_decode" "double,vector")
2159 (set_attr "amdfam10_decode" "double,double")
2160 (set_attr "bdver1_decode" "double,double")
2161 (set_attr "prefix_rep" "1")
2162 (set_attr "prefix" "maybe_vex")
2163 (set_attr "mode" "SI")])
2165 (define_insn "sse_cvtss2siq"
2166 [(set (match_operand:DI 0 "register_operand" "=r,r")
2169 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2170 (parallel [(const_int 0)]))]
2171 UNSPEC_FIX_NOTRUNC))]
2172 "TARGET_SSE && TARGET_64BIT"
2173 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2174 [(set_attr "type" "sseicvt")
2175 (set_attr "athlon_decode" "double,vector")
2176 (set_attr "bdver1_decode" "double,double")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "prefix" "maybe_vex")
2179 (set_attr "mode" "DI")])
2181 (define_insn "sse_cvtss2siq_2"
2182 [(set (match_operand:DI 0 "register_operand" "=r,r")
2183 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2184 UNSPEC_FIX_NOTRUNC))]
2185 "TARGET_SSE && TARGET_64BIT"
2186 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "amdfam10_decode" "double,double")
2190 (set_attr "bdver1_decode" "double,double")
2191 (set_attr "prefix_rep" "1")
2192 (set_attr "prefix" "maybe_vex")
2193 (set_attr "mode" "DI")])
2195 (define_insn "sse_cvttss2si"
2196 [(set (match_operand:SI 0 "register_operand" "=r,r")
2199 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2200 (parallel [(const_int 0)]))))]
2202 "%vcvttss2si\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "athlon_decode" "double,vector")
2205 (set_attr "amdfam10_decode" "double,double")
2206 (set_attr "bdver1_decode" "double,double")
2207 (set_attr "prefix_rep" "1")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "SI")])
2211 (define_insn "sse_cvttss2siq"
2212 [(set (match_operand:DI 0 "register_operand" "=r,r")
2215 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2216 (parallel [(const_int 0)]))))]
2217 "TARGET_SSE && TARGET_64BIT"
2218 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2219 [(set_attr "type" "sseicvt")
2220 (set_attr "athlon_decode" "double,vector")
2221 (set_attr "amdfam10_decode" "double,double")
2222 (set_attr "bdver1_decode" "double,double")
2223 (set_attr "prefix_rep" "1")
2224 (set_attr "prefix" "maybe_vex")
2225 (set_attr "mode" "DI")])
2227 (define_insn "avx_cvtdq2ps256"
2228 [(set (match_operand:V8SF 0 "register_operand" "=x")
2229 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2231 "vcvtdq2ps\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "prefix" "vex")
2234 (set_attr "mode" "V8SF")])
2236 (define_insn "sse2_cvtdq2ps"
2237 [(set (match_operand:V4SF 0 "register_operand" "=x")
2238 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2240 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2241 [(set_attr "type" "ssecvt")
2242 (set_attr "prefix" "maybe_vex")
2243 (set_attr "mode" "V4SF")])
2245 (define_expand "sse2_cvtudq2ps"
2247 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2249 (lt:V4SF (match_dup 5) (match_dup 3)))
2251 (and:V4SF (match_dup 6) (match_dup 4)))
2252 (set (match_operand:V4SF 0 "register_operand" "")
2253 (plus:V4SF (match_dup 5) (match_dup 7)))]
2256 REAL_VALUE_TYPE TWO32r;
2260 real_ldexp (&TWO32r, &dconst1, 32);
2261 x = const_double_from_real_value (TWO32r, SFmode);
2263 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2264 operands[4] = force_reg (V4SFmode,
2265 ix86_build_const_vector (V4SFmode, 1, x));
2267 for (i = 5; i < 8; i++)
2268 operands[i] = gen_reg_rtx (V4SFmode);
2271 (define_insn "avx_cvtps2dq256"
2272 [(set (match_operand:V8SI 0 "register_operand" "=x")
2273 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2274 UNSPEC_FIX_NOTRUNC))]
2276 "vcvtps2dq\t{%1, %0|%0, %1}"
2277 [(set_attr "type" "ssecvt")
2278 (set_attr "prefix" "vex")
2279 (set_attr "mode" "OI")])
2281 (define_insn "sse2_cvtps2dq"
2282 [(set (match_operand:V4SI 0 "register_operand" "=x")
2283 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2284 UNSPEC_FIX_NOTRUNC))]
2286 "%vcvtps2dq\t{%1, %0|%0, %1}"
2287 [(set_attr "type" "ssecvt")
2288 (set (attr "prefix_data16")
2290 (match_test "TARGET_AVX")
2292 (const_string "1")))
2293 (set_attr "prefix" "maybe_vex")
2294 (set_attr "mode" "TI")])
2296 (define_insn "avx_cvttps2dq256"
2297 [(set (match_operand:V8SI 0 "register_operand" "=x")
2298 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2300 "vcvttps2dq\t{%1, %0|%0, %1}"
2301 [(set_attr "type" "ssecvt")
2302 (set_attr "prefix" "vex")
2303 (set_attr "mode" "OI")])
2305 (define_insn "sse2_cvttps2dq"
2306 [(set (match_operand:V4SI 0 "register_operand" "=x")
2307 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2309 "%vcvttps2dq\t{%1, %0|%0, %1}"
2310 [(set_attr "type" "ssecvt")
2311 (set (attr "prefix_rep")
2313 (match_test "TARGET_AVX")
2315 (const_string "1")))
2316 (set (attr "prefix_data16")
2318 (match_test "TARGET_AVX")
2320 (const_string "0")))
2321 (set_attr "prefix_data16" "0")
2322 (set_attr "prefix" "maybe_vex")
2323 (set_attr "mode" "TI")])
2325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2327 ;; Parallel double-precision floating point conversion operations
2329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2331 (define_insn "sse2_cvtpi2pd"
2332 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2333 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2335 "cvtpi2pd\t{%1, %0|%0, %1}"
2336 [(set_attr "type" "ssecvt")
2337 (set_attr "unit" "mmx,*")
2338 (set_attr "prefix_data16" "1,*")
2339 (set_attr "mode" "V2DF")])
2341 (define_insn "sse2_cvtpd2pi"
2342 [(set (match_operand:V2SI 0 "register_operand" "=y")
2343 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2344 UNSPEC_FIX_NOTRUNC))]
2346 "cvtpd2pi\t{%1, %0|%0, %1}"
2347 [(set_attr "type" "ssecvt")
2348 (set_attr "unit" "mmx")
2349 (set_attr "bdver1_decode" "double")
2350 (set_attr "prefix_data16" "1")
2351 (set_attr "mode" "DI")])
2353 (define_insn "sse2_cvttpd2pi"
2354 [(set (match_operand:V2SI 0 "register_operand" "=y")
2355 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2357 "cvttpd2pi\t{%1, %0|%0, %1}"
2358 [(set_attr "type" "ssecvt")
2359 (set_attr "unit" "mmx")
2360 (set_attr "bdver1_decode" "double")
2361 (set_attr "prefix_data16" "1")
2362 (set_attr "mode" "TI")])
2364 (define_insn "sse2_cvtsi2sd"
2365 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2368 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2369 (match_operand:V2DF 1 "register_operand" "0,0,x")
2373 cvtsi2sd\t{%2, %0|%0, %2}
2374 cvtsi2sd\t{%2, %0|%0, %2}
2375 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2376 [(set_attr "isa" "noavx,noavx,avx")
2377 (set_attr "type" "sseicvt")
2378 (set_attr "athlon_decode" "double,direct,*")
2379 (set_attr "amdfam10_decode" "vector,double,*")
2380 (set_attr "bdver1_decode" "double,direct,*")
2381 (set_attr "prefix" "orig,orig,vex")
2382 (set_attr "mode" "DF")])
2384 (define_insn "sse2_cvtsi2sdq"
2385 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2388 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2389 (match_operand:V2DF 1 "register_operand" "0,0,x")
2391 "TARGET_SSE2 && TARGET_64BIT"
2393 cvtsi2sdq\t{%2, %0|%0, %2}
2394 cvtsi2sdq\t{%2, %0|%0, %2}
2395 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2396 [(set_attr "isa" "noavx,noavx,avx")
2397 (set_attr "type" "sseicvt")
2398 (set_attr "athlon_decode" "double,direct,*")
2399 (set_attr "amdfam10_decode" "vector,double,*")
2400 (set_attr "bdver1_decode" "double,direct,*")
2401 (set_attr "length_vex" "*,*,4")
2402 (set_attr "prefix_rex" "1,1,*")
2403 (set_attr "prefix" "orig,orig,vex")
2404 (set_attr "mode" "DF")])
2406 (define_insn "sse2_cvtsd2si"
2407 [(set (match_operand:SI 0 "register_operand" "=r,r")
2410 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2411 (parallel [(const_int 0)]))]
2412 UNSPEC_FIX_NOTRUNC))]
2414 "%vcvtsd2si\t{%1, %0|%0, %1}"
2415 [(set_attr "type" "sseicvt")
2416 (set_attr "athlon_decode" "double,vector")
2417 (set_attr "bdver1_decode" "double,double")
2418 (set_attr "prefix_rep" "1")
2419 (set_attr "prefix" "maybe_vex")
2420 (set_attr "mode" "SI")])
2422 (define_insn "sse2_cvtsd2si_2"
2423 [(set (match_operand:SI 0 "register_operand" "=r,r")
2424 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2425 UNSPEC_FIX_NOTRUNC))]
2427 "%vcvtsd2si\t{%1, %0|%0, %1}"
2428 [(set_attr "type" "sseicvt")
2429 (set_attr "athlon_decode" "double,vector")
2430 (set_attr "amdfam10_decode" "double,double")
2431 (set_attr "bdver1_decode" "double,double")
2432 (set_attr "prefix_rep" "1")
2433 (set_attr "prefix" "maybe_vex")
2434 (set_attr "mode" "SI")])
2436 (define_insn "sse2_cvtsd2siq"
2437 [(set (match_operand:DI 0 "register_operand" "=r,r")
2440 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2441 (parallel [(const_int 0)]))]
2442 UNSPEC_FIX_NOTRUNC))]
2443 "TARGET_SSE2 && TARGET_64BIT"
2444 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2445 [(set_attr "type" "sseicvt")
2446 (set_attr "athlon_decode" "double,vector")
2447 (set_attr "bdver1_decode" "double,double")
2448 (set_attr "prefix_rep" "1")
2449 (set_attr "prefix" "maybe_vex")
2450 (set_attr "mode" "DI")])
2452 (define_insn "sse2_cvtsd2siq_2"
2453 [(set (match_operand:DI 0 "register_operand" "=r,r")
2454 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2455 UNSPEC_FIX_NOTRUNC))]
2456 "TARGET_SSE2 && TARGET_64BIT"
2457 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sseicvt")
2459 (set_attr "athlon_decode" "double,vector")
2460 (set_attr "amdfam10_decode" "double,double")
2461 (set_attr "bdver1_decode" "double,double")
2462 (set_attr "prefix_rep" "1")
2463 (set_attr "prefix" "maybe_vex")
2464 (set_attr "mode" "DI")])
2466 (define_insn "sse2_cvttsd2si"
2467 [(set (match_operand:SI 0 "register_operand" "=r,r")
2470 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2471 (parallel [(const_int 0)]))))]
2473 "%vcvttsd2si\t{%1, %0|%0, %1}"
2474 [(set_attr "type" "sseicvt")
2475 (set_attr "athlon_decode" "double,vector")
2476 (set_attr "amdfam10_decode" "double,double")
2477 (set_attr "bdver1_decode" "double,double")
2478 (set_attr "prefix_rep" "1")
2479 (set_attr "prefix" "maybe_vex")
2480 (set_attr "mode" "SI")])
2482 (define_insn "sse2_cvttsd2siq"
2483 [(set (match_operand:DI 0 "register_operand" "=r,r")
2486 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2487 (parallel [(const_int 0)]))))]
2488 "TARGET_SSE2 && TARGET_64BIT"
2489 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "athlon_decode" "double,vector")
2492 (set_attr "amdfam10_decode" "double,double")
2493 (set_attr "bdver1_decode" "double,double")
2494 (set_attr "prefix_rep" "1")
2495 (set_attr "prefix" "maybe_vex")
2496 (set_attr "mode" "DI")])
2498 (define_insn "avx_cvtdq2pd256"
2499 [(set (match_operand:V4DF 0 "register_operand" "=x")
2500 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2502 "vcvtdq2pd\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "ssecvt")
2504 (set_attr "prefix" "vex")
2505 (set_attr "mode" "V4DF")])
2507 (define_insn "avx_cvtdq2pd256_2"
2508 [(set (match_operand:V4DF 0 "register_operand" "=x")
2511 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2512 (parallel [(const_int 0) (const_int 1)
2513 (const_int 2) (const_int 3)]))))]
2515 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2516 [(set_attr "type" "ssecvt")
2517 (set_attr "prefix" "vex")
2518 (set_attr "mode" "V4DF")])
2520 (define_insn "sse2_cvtdq2pd"
2521 [(set (match_operand:V2DF 0 "register_operand" "=x")
2524 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2525 (parallel [(const_int 0) (const_int 1)]))))]
2527 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2528 [(set_attr "type" "ssecvt")
2529 (set_attr "prefix" "maybe_vex")
2530 (set_attr "mode" "V2DF")])
2532 (define_insn "avx_cvtpd2dq256"
2533 [(set (match_operand:V4SI 0 "register_operand" "=x")
2534 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2535 UNSPEC_FIX_NOTRUNC))]
2537 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2538 [(set_attr "type" "ssecvt")
2539 (set_attr "prefix" "vex")
2540 (set_attr "mode" "OI")])
2542 (define_expand "avx_cvtpd2dq256_2"
2543 [(set (match_operand:V8SI 0 "register_operand" "")
2545 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2549 "operands[2] = CONST0_RTX (V4SImode);")
2551 (define_insn "*avx_cvtpd2dq256_2"
2552 [(set (match_operand:V8SI 0 "register_operand" "=x")
2554 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2556 (match_operand:V4SI 2 "const0_operand" "")))]
2558 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2559 [(set_attr "type" "ssecvt")
2560 (set_attr "prefix" "vex")
2561 (set_attr "mode" "OI")])
2563 (define_expand "sse2_cvtpd2dq"
2564 [(set (match_operand:V4SI 0 "register_operand" "")
2566 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2570 "operands[2] = CONST0_RTX (V2SImode);")
2572 (define_insn "*sse2_cvtpd2dq"
2573 [(set (match_operand:V4SI 0 "register_operand" "=x")
2575 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2577 (match_operand:V2SI 2 "const0_operand" "")))]
2581 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2583 return "cvtpd2dq\t{%1, %0|%0, %1}";
2585 [(set_attr "type" "ssecvt")
2586 (set_attr "prefix_rep" "1")
2587 (set_attr "prefix_data16" "0")
2588 (set_attr "prefix" "maybe_vex")
2589 (set_attr "mode" "TI")
2590 (set_attr "amdfam10_decode" "double")
2591 (set_attr "athlon_decode" "vector")
2592 (set_attr "bdver1_decode" "double")])
2594 (define_insn "avx_cvttpd2dq256"
2595 [(set (match_operand:V4SI 0 "register_operand" "=x")
2596 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2598 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssecvt")
2600 (set_attr "prefix" "vex")
2601 (set_attr "mode" "OI")])
2603 (define_expand "avx_cvttpd2dq256_2"
2604 [(set (match_operand:V8SI 0 "register_operand" "")
2606 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2609 "operands[2] = CONST0_RTX (V4SImode);")
2611 (define_insn "*avx_cvttpd2dq256_2"
2612 [(set (match_operand:V8SI 0 "register_operand" "=x")
2614 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2615 (match_operand:V4SI 2 "const0_operand" "")))]
2617 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "prefix" "vex")
2620 (set_attr "mode" "OI")])
2622 (define_expand "sse2_cvttpd2dq"
2623 [(set (match_operand:V4SI 0 "register_operand" "")
2625 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2628 "operands[2] = CONST0_RTX (V2SImode);")
2630 (define_insn "*sse2_cvttpd2dq"
2631 [(set (match_operand:V4SI 0 "register_operand" "=x")
2633 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2634 (match_operand:V2SI 2 "const0_operand" "")))]
2638 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2640 return "cvttpd2dq\t{%1, %0|%0, %1}";
2642 [(set_attr "type" "ssecvt")
2643 (set_attr "amdfam10_decode" "double")
2644 (set_attr "athlon_decode" "vector")
2645 (set_attr "bdver1_decode" "double")
2646 (set_attr "prefix" "maybe_vex")
2647 (set_attr "mode" "TI")])
2649 (define_insn "sse2_cvtsd2ss"
2650 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2653 (float_truncate:V2SF
2654 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2655 (match_operand:V4SF 1 "register_operand" "0,0,x")
2659 cvtsd2ss\t{%2, %0|%0, %2}
2660 cvtsd2ss\t{%2, %0|%0, %2}
2661 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2662 [(set_attr "isa" "noavx,noavx,avx")
2663 (set_attr "type" "ssecvt")
2664 (set_attr "athlon_decode" "vector,double,*")
2665 (set_attr "amdfam10_decode" "vector,double,*")
2666 (set_attr "bdver1_decode" "direct,direct,*")
2667 (set_attr "prefix" "orig,orig,vex")
2668 (set_attr "mode" "SF")])
2670 (define_insn "sse2_cvtss2sd"
2671 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2675 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2676 (parallel [(const_int 0) (const_int 1)])))
2677 (match_operand:V2DF 1 "register_operand" "0,0,x")
2681 cvtss2sd\t{%2, %0|%0, %2}
2682 cvtss2sd\t{%2, %0|%0, %2}
2683 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2684 [(set_attr "isa" "noavx,noavx,avx")
2685 (set_attr "type" "ssecvt")
2686 (set_attr "amdfam10_decode" "vector,double,*")
2687 (set_attr "athlon_decode" "direct,direct,*")
2688 (set_attr "bdver1_decode" "direct,direct,*")
2689 (set_attr "prefix" "orig,orig,vex")
2690 (set_attr "mode" "DF")])
2692 (define_insn "avx_cvtpd2ps256"
2693 [(set (match_operand:V4SF 0 "register_operand" "=x")
2694 (float_truncate:V4SF
2695 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2697 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssecvt")
2699 (set_attr "prefix" "vex")
2700 (set_attr "mode" "V4SF")])
2702 (define_expand "sse2_cvtpd2ps"
2703 [(set (match_operand:V4SF 0 "register_operand" "")
2705 (float_truncate:V2SF
2706 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2709 "operands[2] = CONST0_RTX (V2SFmode);")
2711 (define_insn "*sse2_cvtpd2ps"
2712 [(set (match_operand:V4SF 0 "register_operand" "=x")
2714 (float_truncate:V2SF
2715 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2716 (match_operand:V2SF 2 "const0_operand" "")))]
2720 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2722 return "cvtpd2ps\t{%1, %0|%0, %1}";
2724 [(set_attr "type" "ssecvt")
2725 (set_attr "amdfam10_decode" "double")
2726 (set_attr "athlon_decode" "vector")
2727 (set_attr "bdver1_decode" "double")
2728 (set_attr "prefix_data16" "1")
2729 (set_attr "prefix" "maybe_vex")
2730 (set_attr "mode" "V4SF")])
2732 (define_insn "avx_cvtps2pd256"
2733 [(set (match_operand:V4DF 0 "register_operand" "=x")
2735 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2737 "vcvtps2pd\t{%1, %0|%0, %1}"
2738 [(set_attr "type" "ssecvt")
2739 (set_attr "prefix" "vex")
2740 (set_attr "mode" "V4DF")])
2742 (define_insn "*avx_cvtps2pd256_2"
2743 [(set (match_operand:V4DF 0 "register_operand" "=x")
2746 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2747 (parallel [(const_int 0) (const_int 1)
2748 (const_int 2) (const_int 3)]))))]
2750 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2751 [(set_attr "type" "ssecvt")
2752 (set_attr "prefix" "vex")
2753 (set_attr "mode" "V4DF")])
2755 (define_insn "sse2_cvtps2pd"
2756 [(set (match_operand:V2DF 0 "register_operand" "=x")
2759 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2760 (parallel [(const_int 0) (const_int 1)]))))]
2762 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2763 [(set_attr "type" "ssecvt")
2764 (set_attr "amdfam10_decode" "direct")
2765 (set_attr "athlon_decode" "double")
2766 (set_attr "bdver1_decode" "double")
2767 (set_attr "prefix_data16" "0")
2768 (set_attr "prefix" "maybe_vex")
2769 (set_attr "mode" "V2DF")])
2771 (define_expand "vec_unpacks_hi_v4sf"
2776 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2777 (parallel [(const_int 6) (const_int 7)
2778 (const_int 2) (const_int 3)])))
2779 (set (match_operand:V2DF 0 "register_operand" "")
2783 (parallel [(const_int 0) (const_int 1)]))))]
2785 "operands[2] = gen_reg_rtx (V4SFmode);")
2787 (define_expand "vec_unpacks_hi_v8sf"
2790 (match_operand:V8SF 1 "nonimmediate_operand" "")
2791 (parallel [(const_int 4) (const_int 5)
2792 (const_int 6) (const_int 7)])))
2793 (set (match_operand:V4DF 0 "register_operand" "")
2797 "operands[2] = gen_reg_rtx (V4SFmode);")
2799 (define_expand "vec_unpacks_lo_v4sf"
2800 [(set (match_operand:V2DF 0 "register_operand" "")
2803 (match_operand:V4SF 1 "nonimmediate_operand" "")
2804 (parallel [(const_int 0) (const_int 1)]))))]
2807 (define_expand "vec_unpacks_lo_v8sf"
2808 [(set (match_operand:V4DF 0 "register_operand" "")
2811 (match_operand:V8SF 1 "nonimmediate_operand" "")
2812 (parallel [(const_int 0) (const_int 1)
2813 (const_int 2) (const_int 3)]))))]
2816 (define_mode_attr sseunpackfltmode
2817 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2819 (define_expand "vec_unpacks_float_hi_<mode>"
2820 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2821 (match_operand:VI2_AVX2 1 "register_operand" "")]
2824 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2826 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2828 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2832 (define_expand "vec_unpacks_float_lo_<mode>"
2833 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2834 (match_operand:VI2_AVX2 1 "register_operand" "")]
2837 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2839 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2840 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2841 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2845 (define_expand "vec_unpacku_float_hi_<mode>"
2846 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2847 (match_operand:VI2_AVX2 1 "register_operand" "")]
2850 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2852 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2853 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2854 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2858 (define_expand "vec_unpacku_float_lo_<mode>"
2859 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2860 (match_operand:VI2_AVX2 1 "register_operand" "")]
2863 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2865 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2866 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2867 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2871 (define_expand "vec_unpacks_float_hi_v4si"
2874 (match_operand:V4SI 1 "nonimmediate_operand" "")
2875 (parallel [(const_int 2) (const_int 3)
2876 (const_int 2) (const_int 3)])))
2877 (set (match_operand:V2DF 0 "register_operand" "")
2881 (parallel [(const_int 0) (const_int 1)]))))]
2883 "operands[2] = gen_reg_rtx (V4SImode);")
2885 (define_expand "vec_unpacks_float_lo_v4si"
2886 [(set (match_operand:V2DF 0 "register_operand" "")
2889 (match_operand:V4SI 1 "nonimmediate_operand" "")
2890 (parallel [(const_int 0) (const_int 1)]))))]
2893 (define_expand "vec_unpacks_float_hi_v8si"
2896 (match_operand:V8SI 1 "nonimmediate_operand" "")
2897 (parallel [(const_int 4) (const_int 5)
2898 (const_int 6) (const_int 7)])))
2899 (set (match_operand:V4DF 0 "register_operand" "")
2903 "operands[2] = gen_reg_rtx (V4SImode);")
2905 (define_expand "vec_unpacks_float_lo_v8si"
2906 [(set (match_operand:V4DF 0 "register_operand" "")
2909 (match_operand:V8SI 1 "nonimmediate_operand" "")
2910 (parallel [(const_int 0) (const_int 1)
2911 (const_int 2) (const_int 3)]))))]
2914 (define_expand "vec_unpacku_float_hi_v4si"
2917 (match_operand:V4SI 1 "nonimmediate_operand" "")
2918 (parallel [(const_int 2) (const_int 3)
2919 (const_int 2) (const_int 3)])))
2924 (parallel [(const_int 0) (const_int 1)]))))
2926 (lt:V2DF (match_dup 6) (match_dup 3)))
2928 (and:V2DF (match_dup 7) (match_dup 4)))
2929 (set (match_operand:V2DF 0 "register_operand" "")
2930 (plus:V2DF (match_dup 6) (match_dup 8)))]
2933 REAL_VALUE_TYPE TWO32r;
2937 real_ldexp (&TWO32r, &dconst1, 32);
2938 x = const_double_from_real_value (TWO32r, DFmode);
2940 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2941 operands[4] = force_reg (V2DFmode,
2942 ix86_build_const_vector (V2DFmode, 1, x));
2944 operands[5] = gen_reg_rtx (V4SImode);
2946 for (i = 6; i < 9; i++)
2947 operands[i] = gen_reg_rtx (V2DFmode);
2950 (define_expand "vec_unpacku_float_lo_v4si"
2954 (match_operand:V4SI 1 "nonimmediate_operand" "")
2955 (parallel [(const_int 0) (const_int 1)]))))
2957 (lt:V2DF (match_dup 5) (match_dup 3)))
2959 (and:V2DF (match_dup 6) (match_dup 4)))
2960 (set (match_operand:V2DF 0 "register_operand" "")
2961 (plus:V2DF (match_dup 5) (match_dup 7)))]
2964 REAL_VALUE_TYPE TWO32r;
2968 real_ldexp (&TWO32r, &dconst1, 32);
2969 x = const_double_from_real_value (TWO32r, DFmode);
2971 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2972 operands[4] = force_reg (V2DFmode,
2973 ix86_build_const_vector (V2DFmode, 1, x));
2975 for (i = 5; i < 8; i++)
2976 operands[i] = gen_reg_rtx (V2DFmode);
2979 (define_expand "vec_unpacku_float_hi_v8si"
2980 [(match_operand:V4DF 0 "register_operand" "")
2981 (match_operand:V8SI 1 "register_operand" "")]
2984 REAL_VALUE_TYPE TWO32r;
2988 real_ldexp (&TWO32r, &dconst1, 32);
2989 x = const_double_from_real_value (TWO32r, DFmode);
2991 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2992 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2993 tmp[5] = gen_reg_rtx (V4SImode);
2995 for (i = 2; i < 5; i++)
2996 tmp[i] = gen_reg_rtx (V4DFmode);
2997 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2998 emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
2999 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3000 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3001 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3002 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3006 (define_expand "vec_unpacku_float_lo_v8si"
3007 [(match_operand:V4DF 0 "register_operand" "")
3008 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3011 REAL_VALUE_TYPE TWO32r;
3015 real_ldexp (&TWO32r, &dconst1, 32);
3016 x = const_double_from_real_value (TWO32r, DFmode);
3018 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3019 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3021 for (i = 2; i < 5; i++)
3022 tmp[i] = gen_reg_rtx (V4DFmode);
3023 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3024 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3025 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3026 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3027 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3031 (define_expand "vec_pack_trunc_v4df"
3033 (float_truncate:V4SF
3034 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3036 (float_truncate:V4SF
3037 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3038 (set (match_operand:V8SF 0 "register_operand" "")
3044 operands[3] = gen_reg_rtx (V4SFmode);
3045 operands[4] = gen_reg_rtx (V4SFmode);
3048 (define_expand "vec_pack_trunc_v2df"
3049 [(match_operand:V4SF 0 "register_operand" "")
3050 (match_operand:V2DF 1 "nonimmediate_operand" "")
3051 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3056 r1 = gen_reg_rtx (V4SFmode);
3057 r2 = gen_reg_rtx (V4SFmode);
3059 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3060 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3061 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3065 (define_expand "vec_pack_sfix_trunc_v4df"
3066 [(match_operand:V8SI 0 "register_operand" "")
3067 (match_operand:V4DF 1 "nonimmediate_operand" "")
3068 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3073 r1 = gen_reg_rtx (V8SImode);
3074 r2 = gen_reg_rtx (V8SImode);
3076 emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
3077 emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
3078 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3082 (define_expand "vec_pack_sfix_trunc_v2df"
3083 [(match_operand:V4SI 0 "register_operand" "")
3084 (match_operand:V2DF 1 "nonimmediate_operand" "")
3085 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3090 r1 = gen_reg_rtx (V4SImode);
3091 r2 = gen_reg_rtx (V4SImode);
3093 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3094 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3095 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3096 gen_lowpart (V2DImode, r1),
3097 gen_lowpart (V2DImode, r2)));
3101 (define_expand "vec_pack_sfix_v4df"
3102 [(match_operand:V8SI 0 "register_operand" "")
3103 (match_operand:V4DF 1 "nonimmediate_operand" "")
3104 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3109 r1 = gen_reg_rtx (V8SImode);
3110 r2 = gen_reg_rtx (V8SImode);
3112 emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
3113 emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
3114 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3118 (define_expand "vec_pack_sfix_v2df"
3119 [(match_operand:V4SI 0 "register_operand" "")
3120 (match_operand:V2DF 1 "nonimmediate_operand" "")
3121 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3126 r1 = gen_reg_rtx (V4SImode);
3127 r2 = gen_reg_rtx (V4SImode);
3129 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3130 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3131 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3132 gen_lowpart (V2DImode, r1),
3133 gen_lowpart (V2DImode, r2)));
3137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3139 ;; Parallel single-precision floating point element swizzling
3141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3143 (define_expand "sse_movhlps_exp"
3144 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3147 (match_operand:V4SF 1 "nonimmediate_operand" "")
3148 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3149 (parallel [(const_int 6)
3155 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3157 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3159 /* Fix up the destination if needed. */
3160 if (dst != operands[0])
3161 emit_move_insn (operands[0], dst);
3166 (define_insn "sse_movhlps"
3167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3170 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3171 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3172 (parallel [(const_int 6)
3176 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3178 movhlps\t{%2, %0|%0, %2}
3179 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3180 movlps\t{%H2, %0|%0, %H2}
3181 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3182 %vmovhps\t{%2, %0|%0, %2}"
3183 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3184 (set_attr "type" "ssemov")
3185 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3186 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3188 (define_expand "sse_movlhps_exp"
3189 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3192 (match_operand:V4SF 1 "nonimmediate_operand" "")
3193 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3194 (parallel [(const_int 0)
3200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3202 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3204 /* Fix up the destination if needed. */
3205 if (dst != operands[0])
3206 emit_move_insn (operands[0], dst);
3211 (define_insn "sse_movlhps"
3212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3215 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3216 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3217 (parallel [(const_int 0)
3221 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3223 movlhps\t{%2, %0|%0, %2}
3224 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3225 movhps\t{%2, %0|%0, %2}
3226 vmovhps\t{%2, %1, %0|%0, %1, %2}
3227 %vmovlps\t{%2, %H0|%H0, %2}"
3228 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3229 (set_attr "type" "ssemov")
3230 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3231 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3233 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3234 (define_insn "avx_unpckhps256"
3235 [(set (match_operand:V8SF 0 "register_operand" "=x")
3238 (match_operand:V8SF 1 "register_operand" "x")
3239 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3240 (parallel [(const_int 2) (const_int 10)
3241 (const_int 3) (const_int 11)
3242 (const_int 6) (const_int 14)
3243 (const_int 7) (const_int 15)])))]
3245 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3246 [(set_attr "type" "sselog")
3247 (set_attr "prefix" "vex")
3248 (set_attr "mode" "V8SF")])
3250 (define_expand "vec_interleave_highv8sf"
3254 (match_operand:V8SF 1 "register_operand" "x")
3255 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3256 (parallel [(const_int 0) (const_int 8)
3257 (const_int 1) (const_int 9)
3258 (const_int 4) (const_int 12)
3259 (const_int 5) (const_int 13)])))
3265 (parallel [(const_int 2) (const_int 10)
3266 (const_int 3) (const_int 11)
3267 (const_int 6) (const_int 14)
3268 (const_int 7) (const_int 15)])))
3269 (set (match_operand:V8SF 0 "register_operand" "")
3274 (parallel [(const_int 4) (const_int 5)
3275 (const_int 6) (const_int 7)
3276 (const_int 12) (const_int 13)
3277 (const_int 14) (const_int 15)])))]
3280 operands[3] = gen_reg_rtx (V8SFmode);
3281 operands[4] = gen_reg_rtx (V8SFmode);
3284 (define_insn "vec_interleave_highv4sf"
3285 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3288 (match_operand:V4SF 1 "register_operand" "0,x")
3289 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3290 (parallel [(const_int 2) (const_int 6)
3291 (const_int 3) (const_int 7)])))]
3294 unpckhps\t{%2, %0|%0, %2}
3295 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3296 [(set_attr "isa" "noavx,avx")
3297 (set_attr "type" "sselog")
3298 (set_attr "prefix" "orig,vex")
3299 (set_attr "mode" "V4SF")])
3301 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3302 (define_insn "avx_unpcklps256"
3303 [(set (match_operand:V8SF 0 "register_operand" "=x")
3306 (match_operand:V8SF 1 "register_operand" "x")
3307 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3308 (parallel [(const_int 0) (const_int 8)
3309 (const_int 1) (const_int 9)
3310 (const_int 4) (const_int 12)
3311 (const_int 5) (const_int 13)])))]
3313 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3314 [(set_attr "type" "sselog")
3315 (set_attr "prefix" "vex")
3316 (set_attr "mode" "V8SF")])
3318 (define_expand "vec_interleave_lowv8sf"
3322 (match_operand:V8SF 1 "register_operand" "x")
3323 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3324 (parallel [(const_int 0) (const_int 8)
3325 (const_int 1) (const_int 9)
3326 (const_int 4) (const_int 12)
3327 (const_int 5) (const_int 13)])))
3333 (parallel [(const_int 2) (const_int 10)
3334 (const_int 3) (const_int 11)
3335 (const_int 6) (const_int 14)
3336 (const_int 7) (const_int 15)])))
3337 (set (match_operand:V8SF 0 "register_operand" "")
3342 (parallel [(const_int 0) (const_int 1)
3343 (const_int 2) (const_int 3)
3344 (const_int 8) (const_int 9)
3345 (const_int 10) (const_int 11)])))]
3348 operands[3] = gen_reg_rtx (V8SFmode);
3349 operands[4] = gen_reg_rtx (V8SFmode);
3352 (define_insn "vec_interleave_lowv4sf"
3353 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3356 (match_operand:V4SF 1 "register_operand" "0,x")
3357 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3358 (parallel [(const_int 0) (const_int 4)
3359 (const_int 1) (const_int 5)])))]
3362 unpcklps\t{%2, %0|%0, %2}
3363 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3364 [(set_attr "isa" "noavx,avx")
3365 (set_attr "type" "sselog")
3366 (set_attr "prefix" "orig,vex")
3367 (set_attr "mode" "V4SF")])
3369 ;; These are modeled with the same vec_concat as the others so that we
3370 ;; capture users of shufps that can use the new instructions
3371 (define_insn "avx_movshdup256"
3372 [(set (match_operand:V8SF 0 "register_operand" "=x")
3375 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3377 (parallel [(const_int 1) (const_int 1)
3378 (const_int 3) (const_int 3)
3379 (const_int 5) (const_int 5)
3380 (const_int 7) (const_int 7)])))]
3382 "vmovshdup\t{%1, %0|%0, %1}"
3383 [(set_attr "type" "sse")
3384 (set_attr "prefix" "vex")
3385 (set_attr "mode" "V8SF")])
3387 (define_insn "sse3_movshdup"
3388 [(set (match_operand:V4SF 0 "register_operand" "=x")
3391 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3393 (parallel [(const_int 1)
3398 "%vmovshdup\t{%1, %0|%0, %1}"
3399 [(set_attr "type" "sse")
3400 (set_attr "prefix_rep" "1")
3401 (set_attr "prefix" "maybe_vex")
3402 (set_attr "mode" "V4SF")])
3404 (define_insn "avx_movsldup256"
3405 [(set (match_operand:V8SF 0 "register_operand" "=x")
3408 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3410 (parallel [(const_int 0) (const_int 0)
3411 (const_int 2) (const_int 2)
3412 (const_int 4) (const_int 4)
3413 (const_int 6) (const_int 6)])))]
3415 "vmovsldup\t{%1, %0|%0, %1}"
3416 [(set_attr "type" "sse")
3417 (set_attr "prefix" "vex")
3418 (set_attr "mode" "V8SF")])
3420 (define_insn "sse3_movsldup"
3421 [(set (match_operand:V4SF 0 "register_operand" "=x")
3424 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3426 (parallel [(const_int 0)
3431 "%vmovsldup\t{%1, %0|%0, %1}"
3432 [(set_attr "type" "sse")
3433 (set_attr "prefix_rep" "1")
3434 (set_attr "prefix" "maybe_vex")
3435 (set_attr "mode" "V4SF")])
3437 (define_expand "avx_shufps256"
3438 [(match_operand:V8SF 0 "register_operand" "")
3439 (match_operand:V8SF 1 "register_operand" "")
3440 (match_operand:V8SF 2 "nonimmediate_operand" "")
3441 (match_operand:SI 3 "const_int_operand" "")]
3444 int mask = INTVAL (operands[3]);
3445 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3446 GEN_INT ((mask >> 0) & 3),
3447 GEN_INT ((mask >> 2) & 3),
3448 GEN_INT (((mask >> 4) & 3) + 8),
3449 GEN_INT (((mask >> 6) & 3) + 8),
3450 GEN_INT (((mask >> 0) & 3) + 4),
3451 GEN_INT (((mask >> 2) & 3) + 4),
3452 GEN_INT (((mask >> 4) & 3) + 12),
3453 GEN_INT (((mask >> 6) & 3) + 12)));
3457 ;; One bit in mask selects 2 elements.
3458 (define_insn "avx_shufps256_1"
3459 [(set (match_operand:V8SF 0 "register_operand" "=x")
3462 (match_operand:V8SF 1 "register_operand" "x")
3463 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3464 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3465 (match_operand 4 "const_0_to_3_operand" "")
3466 (match_operand 5 "const_8_to_11_operand" "")
3467 (match_operand 6 "const_8_to_11_operand" "")
3468 (match_operand 7 "const_4_to_7_operand" "")
3469 (match_operand 8 "const_4_to_7_operand" "")
3470 (match_operand 9 "const_12_to_15_operand" "")
3471 (match_operand 10 "const_12_to_15_operand" "")])))]
3473 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3474 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3475 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3476 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3479 mask = INTVAL (operands[3]);
3480 mask |= INTVAL (operands[4]) << 2;
3481 mask |= (INTVAL (operands[5]) - 8) << 4;
3482 mask |= (INTVAL (operands[6]) - 8) << 6;
3483 operands[3] = GEN_INT (mask);
3485 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3487 [(set_attr "type" "sselog")
3488 (set_attr "length_immediate" "1")
3489 (set_attr "prefix" "vex")
3490 (set_attr "mode" "V8SF")])
3492 (define_expand "sse_shufps"
3493 [(match_operand:V4SF 0 "register_operand" "")
3494 (match_operand:V4SF 1 "register_operand" "")
3495 (match_operand:V4SF 2 "nonimmediate_operand" "")
3496 (match_operand:SI 3 "const_int_operand" "")]
3499 int mask = INTVAL (operands[3]);
3500 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3501 GEN_INT ((mask >> 0) & 3),
3502 GEN_INT ((mask >> 2) & 3),
3503 GEN_INT (((mask >> 4) & 3) + 4),
3504 GEN_INT (((mask >> 6) & 3) + 4)));
3508 (define_insn "sse_shufps_<mode>"
3509 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3510 (vec_select:VI4F_128
3511 (vec_concat:<ssedoublevecmode>
3512 (match_operand:VI4F_128 1 "register_operand" "0,x")
3513 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3514 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3515 (match_operand 4 "const_0_to_3_operand" "")
3516 (match_operand 5 "const_4_to_7_operand" "")
3517 (match_operand 6 "const_4_to_7_operand" "")])))]
3521 mask |= INTVAL (operands[3]) << 0;
3522 mask |= INTVAL (operands[4]) << 2;
3523 mask |= (INTVAL (operands[5]) - 4) << 4;
3524 mask |= (INTVAL (operands[6]) - 4) << 6;
3525 operands[3] = GEN_INT (mask);
3527 switch (which_alternative)
3530 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3532 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3537 [(set_attr "isa" "noavx,avx")
3538 (set_attr "type" "sselog")
3539 (set_attr "length_immediate" "1")
3540 (set_attr "prefix" "orig,vex")
3541 (set_attr "mode" "V4SF")])
3543 (define_insn "sse_storehps"
3544 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3546 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3547 (parallel [(const_int 2) (const_int 3)])))]
3550 %vmovhps\t{%1, %0|%0, %1}
3551 %vmovhlps\t{%1, %d0|%d0, %1}
3552 %vmovlps\t{%H1, %d0|%d0, %H1}"
3553 [(set_attr "type" "ssemov")
3554 (set_attr "prefix" "maybe_vex")
3555 (set_attr "mode" "V2SF,V4SF,V2SF")])
3557 (define_expand "sse_loadhps_exp"
3558 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3561 (match_operand:V4SF 1 "nonimmediate_operand" "")
3562 (parallel [(const_int 0) (const_int 1)]))
3563 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3566 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3568 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3570 /* Fix up the destination if needed. */
3571 if (dst != operands[0])
3572 emit_move_insn (operands[0], dst);
3577 (define_insn "sse_loadhps"
3578 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3581 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3582 (parallel [(const_int 0) (const_int 1)]))
3583 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3586 movhps\t{%2, %0|%0, %2}
3587 vmovhps\t{%2, %1, %0|%0, %1, %2}
3588 movlhps\t{%2, %0|%0, %2}
3589 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3590 %vmovlps\t{%2, %H0|%H0, %2}"
3591 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3592 (set_attr "type" "ssemov")
3593 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3594 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3596 (define_insn "sse_storelps"
3597 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3599 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3600 (parallel [(const_int 0) (const_int 1)])))]
3603 %vmovlps\t{%1, %0|%0, %1}
3604 %vmovaps\t{%1, %0|%0, %1}
3605 %vmovlps\t{%1, %d0|%d0, %1}"
3606 [(set_attr "type" "ssemov")
3607 (set_attr "prefix" "maybe_vex")
3608 (set_attr "mode" "V2SF,V4SF,V2SF")])
3610 (define_expand "sse_loadlps_exp"
3611 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3613 (match_operand:V2SF 2 "nonimmediate_operand" "")
3615 (match_operand:V4SF 1 "nonimmediate_operand" "")
3616 (parallel [(const_int 2) (const_int 3)]))))]
3619 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3621 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3623 /* Fix up the destination if needed. */
3624 if (dst != operands[0])
3625 emit_move_insn (operands[0], dst);
3630 (define_insn "sse_loadlps"
3631 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3633 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3635 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3636 (parallel [(const_int 2) (const_int 3)]))))]
3639 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3640 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3641 movlps\t{%2, %0|%0, %2}
3642 vmovlps\t{%2, %1, %0|%0, %1, %2}
3643 %vmovlps\t{%2, %0|%0, %2}"
3644 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3645 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3646 (set_attr "length_immediate" "1,1,*,*,*")
3647 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3648 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3650 (define_insn "sse_movss"
3651 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3653 (match_operand:V4SF 2 "register_operand" " x,x")
3654 (match_operand:V4SF 1 "register_operand" " 0,x")
3658 movss\t{%2, %0|%0, %2}
3659 vmovss\t{%2, %1, %0|%0, %1, %2}"
3660 [(set_attr "isa" "noavx,avx")
3661 (set_attr "type" "ssemov")
3662 (set_attr "prefix" "orig,vex")
3663 (set_attr "mode" "SF")])
3665 (define_expand "vec_dupv4sf"
3666 [(set (match_operand:V4SF 0 "register_operand" "")
3668 (match_operand:SF 1 "nonimmediate_operand" "")))]
3672 operands[1] = force_reg (SFmode, operands[1]);
3675 (define_insn "avx2_vec_dupv4sf"
3676 [(set (match_operand:V4SF 0 "register_operand" "=x")
3679 (match_operand:V4SF 1 "register_operand" "x")
3680 (parallel [(const_int 0)]))))]
3682 "vbroadcastss\t{%1, %0|%0, %1}"
3683 [(set_attr "type" "sselog1")
3684 (set_attr "prefix" "vex")
3685 (set_attr "mode" "V4SF")])
3687 (define_insn "*vec_dupv4sf_avx"
3688 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3690 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3693 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3694 vbroadcastss\t{%1, %0|%0, %1}"
3695 [(set_attr "type" "sselog1,ssemov")
3696 (set_attr "length_immediate" "1,0")
3697 (set_attr "prefix_extra" "0,1")
3698 (set_attr "prefix" "vex")
3699 (set_attr "mode" "V4SF")])
3701 (define_insn "avx2_vec_dupv8sf"
3702 [(set (match_operand:V8SF 0 "register_operand" "=x")
3705 (match_operand:V4SF 1 "register_operand" "x")
3706 (parallel [(const_int 0)]))))]
3708 "vbroadcastss\t{%1, %0|%0, %1}"
3709 [(set_attr "type" "sselog1")
3710 (set_attr "prefix" "vex")
3711 (set_attr "mode" "V8SF")])
3713 (define_insn "*vec_dupv4sf"
3714 [(set (match_operand:V4SF 0 "register_operand" "=x")
3716 (match_operand:SF 1 "register_operand" "0")))]
3718 "shufps\t{$0, %0, %0|%0, %0, 0}"
3719 [(set_attr "type" "sselog1")
3720 (set_attr "length_immediate" "1")
3721 (set_attr "mode" "V4SF")])
3723 ;; Although insertps takes register source, we prefer
3724 ;; unpcklps with register source since it is shorter.
3725 (define_insn "*vec_concatv2sf_sse4_1"
3726 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3728 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3729 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3732 unpcklps\t{%2, %0|%0, %2}
3733 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3734 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3735 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3736 %vmovss\t{%1, %0|%0, %1}
3737 punpckldq\t{%2, %0|%0, %2}
3738 movd\t{%1, %0|%0, %1}"
3739 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3740 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3741 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3742 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3743 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3744 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3745 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3747 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3748 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3749 ;; alternatives pretty much forces the MMX alternative to be chosen.
3750 (define_insn "*vec_concatv2sf_sse"
3751 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3753 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3754 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3757 unpcklps\t{%2, %0|%0, %2}
3758 movss\t{%1, %0|%0, %1}
3759 punpckldq\t{%2, %0|%0, %2}
3760 movd\t{%1, %0|%0, %1}"
3761 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3762 (set_attr "mode" "V4SF,SF,DI,DI")])
3764 (define_insn "*vec_concatv4sf"
3765 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3767 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3768 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3771 movlhps\t{%2, %0|%0, %2}
3772 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3773 movhps\t{%2, %0|%0, %2}
3774 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3775 [(set_attr "isa" "noavx,avx,noavx,avx")
3776 (set_attr "type" "ssemov")
3777 (set_attr "prefix" "orig,vex,orig,vex")
3778 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3780 (define_expand "vec_init<mode>"
3781 [(match_operand:V_128 0 "register_operand" "")
3782 (match_operand 1 "" "")]
3785 ix86_expand_vector_init (false, operands[0], operands[1]);
3789 ;; Avoid combining registers from different units in a single alternative,
3790 ;; see comment above inline_secondary_memory_needed function in i386.c
3791 (define_insn "vec_set<mode>_0"
3792 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3793 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3795 (vec_duplicate:VI4F_128
3796 (match_operand:<ssescalarmode> 2 "general_operand"
3797 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3798 (match_operand:VI4F_128 1 "vector_move_operand"
3799 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3803 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3804 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3805 %vmovd\t{%2, %0|%0, %2}
3806 movss\t{%2, %0|%0, %2}
3807 movss\t{%2, %0|%0, %2}
3808 vmovss\t{%2, %1, %0|%0, %1, %2}
3809 pinsrd\t{$0, %2, %0|%0, %2, 0}
3810 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3814 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3816 (cond [(eq_attr "alternative" "0,6,7")
3817 (const_string "sselog")
3818 (eq_attr "alternative" "9")
3819 (const_string "fmov")
3820 (eq_attr "alternative" "10")
3821 (const_string "imov")
3823 (const_string "ssemov")))
3824 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3825 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3826 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3827 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3829 ;; A subset is vec_setv4sf.
3830 (define_insn "*vec_setv4sf_sse4_1"
3831 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3834 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3835 (match_operand:V4SF 1 "register_operand" "0,x")
3836 (match_operand:SI 3 "const_int_operand" "")))]
3838 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3839 < GET_MODE_NUNITS (V4SFmode))"
3841 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3842 switch (which_alternative)
3845 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3847 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3852 [(set_attr "isa" "noavx,avx")
3853 (set_attr "type" "sselog")
3854 (set_attr "prefix_data16" "1,*")
3855 (set_attr "prefix_extra" "1")
3856 (set_attr "length_immediate" "1")
3857 (set_attr "prefix" "orig,vex")
3858 (set_attr "mode" "V4SF")])
3860 (define_insn "sse4_1_insertps"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3862 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3863 (match_operand:V4SF 1 "register_operand" "0,x")
3864 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3868 if (MEM_P (operands[2]))
3870 unsigned count_s = INTVAL (operands[3]) >> 6;
3872 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3873 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3875 switch (which_alternative)
3878 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3880 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3885 [(set_attr "isa" "noavx,avx")
3886 (set_attr "type" "sselog")
3887 (set_attr "prefix_data16" "1,*")
3888 (set_attr "prefix_extra" "1")
3889 (set_attr "length_immediate" "1")
3890 (set_attr "prefix" "orig,vex")
3891 (set_attr "mode" "V4SF")])
3894 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3896 (vec_duplicate:VI4F_128
3897 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3900 "TARGET_SSE && reload_completed"
3903 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3908 (define_expand "vec_set<mode>"
3909 [(match_operand:V 0 "register_operand" "")
3910 (match_operand:<ssescalarmode> 1 "register_operand" "")
3911 (match_operand 2 "const_int_operand" "")]
3914 ix86_expand_vector_set (false, operands[0], operands[1],
3915 INTVAL (operands[2]));
3919 (define_insn_and_split "*vec_extractv4sf_0"
3920 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3922 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3923 (parallel [(const_int 0)])))]
3924 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3926 "&& reload_completed"
3929 rtx op1 = operands[1];
3931 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3933 op1 = gen_lowpart (SFmode, op1);
3934 emit_move_insn (operands[0], op1);
3938 (define_insn_and_split "*sse4_1_extractps"
3939 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3941 (match_operand:V4SF 1 "register_operand" "x,0,x")
3942 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3945 %vextractps\t{%2, %1, %0|%0, %1, %2}
3948 "&& reload_completed && SSE_REG_P (operands[0])"
3951 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3952 switch (INTVAL (operands[2]))
3956 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3957 operands[2], operands[2],
3958 GEN_INT (INTVAL (operands[2]) + 4),
3959 GEN_INT (INTVAL (operands[2]) + 4)));
3962 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3965 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3970 [(set_attr "isa" "*,noavx,avx")
3971 (set_attr "type" "sselog,*,*")
3972 (set_attr "prefix_data16" "1,*,*")
3973 (set_attr "prefix_extra" "1,*,*")
3974 (set_attr "length_immediate" "1,*,*")
3975 (set_attr "prefix" "maybe_vex,*,*")
3976 (set_attr "mode" "V4SF,*,*")])
3978 (define_insn_and_split "*vec_extract_v4sf_mem"
3979 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3981 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3982 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3985 "&& reload_completed"
3988 int i = INTVAL (operands[2]);
3990 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3994 (define_expand "avx_vextractf128<mode>"
3995 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3996 (match_operand:V_256 1 "register_operand" "")
3997 (match_operand:SI 2 "const_0_to_1_operand" "")]
4000 rtx (*insn)(rtx, rtx);
4002 switch (INTVAL (operands[2]))
4005 insn = gen_vec_extract_lo_<mode>;
4008 insn = gen_vec_extract_hi_<mode>;
4014 emit_insn (insn (operands[0], operands[1]));
4018 (define_insn_and_split "vec_extract_lo_<mode>"
4019 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4020 (vec_select:<ssehalfvecmode>
4021 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4022 (parallel [(const_int 0) (const_int 1)])))]
4023 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4025 "&& reload_completed"
4028 rtx op1 = operands[1];
4030 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4032 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4033 emit_move_insn (operands[0], op1);
4037 (define_insn "vec_extract_hi_<mode>"
4038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4039 (vec_select:<ssehalfvecmode>
4040 (match_operand:VI8F_256 1 "register_operand" "x,x")
4041 (parallel [(const_int 2) (const_int 3)])))]
4043 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4044 [(set_attr "type" "sselog")
4045 (set_attr "prefix_extra" "1")