1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [(V8SI "TARGET_AVX2") V4SI
130 (V4DI "TARGET_AVX2") V2DI])
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
150 (TI "ssse3") (V2TI "avx2")])
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 (define_mode_attr vec_avx2
165 [(V16QI "vec") (V32QI "avx2")
166 (V8HI "vec") (V16HI "avx2")
167 (V4SI "vec") (V8SI "avx2")
168 (V2DI "vec") (V4DI "avx2")])
170 (define_mode_attr ssedoublemode
171 [(V16HI "V16SI") (V8HI "V8SI")])
173 (define_mode_attr ssebytemode
174 [(V4DI "V32QI") (V2DI "V16QI")])
176 ;; All 128bit vector integer modes
177 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
179 ;; All 256bit vector integer modes
180 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
182 ;; Random 128bit vector integer mode combinations
183 (define_mode_iterator VI12_128 [V16QI V8HI])
184 (define_mode_iterator VI14_128 [V16QI V4SI])
185 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
186 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
187 (define_mode_iterator VI24_128 [V8HI V4SI])
188 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
189 (define_mode_iterator VI48_128 [V4SI V2DI])
191 ;; Random 256bit vector integer mode combinations
192 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
193 (define_mode_iterator VI48_256 [V8SI V4DI])
195 ;; Int-float size matches
196 (define_mode_iterator VI4F_128 [V4SI V4SF])
197 (define_mode_iterator VI8F_128 [V2DI V2DF])
198 (define_mode_iterator VI4F_256 [V8SI V8SF])
199 (define_mode_iterator VI8F_256 [V4DI V4DF])
201 ;; Mapping from float mode to required SSE level
202 (define_mode_attr sse
203 [(SF "sse") (DF "sse2")
204 (V4SF "sse") (V2DF "sse2")
205 (V8SF "avx") (V4DF "avx")])
207 (define_mode_attr sse2
208 [(V16QI "sse2") (V32QI "avx")
209 (V2DI "sse2") (V4DI "avx")])
211 (define_mode_attr sse3
212 [(V16QI "sse3") (V32QI "avx")])
214 (define_mode_attr sse4_1
215 [(V4SF "sse4_1") (V2DF "sse4_1")
216 (V8SF "avx") (V4DF "avx")])
218 (define_mode_attr avxsizesuffix
219 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
220 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
221 (V8SF "256") (V4DF "256")
222 (V4SF "") (V2DF "")])
224 ;; SSE instruction mode
225 (define_mode_attr sseinsnmode
226 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
227 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
228 (V8SF "V8SF") (V4DF "V4DF")
229 (V4SF "V4SF") (V2DF "V2DF")
232 ;; Mapping of vector float modes to an integer mode of the same size
233 (define_mode_attr sseintvecmode
234 [(V8SF "V8SI") (V4DF "V4DI")
235 (V4SF "V4SI") (V2DF "V2DI")
236 (V8SI "V8SI") (V4DI "V4DI")
237 (V4SI "V4SI") (V2DI "V2DI")
238 (V16HI "V16HI") (V8HI "V8HI")
239 (V32QI "V32QI") (V16QI "V16QI")])
241 (define_mode_attr sseintvecmodelower
242 [(V8SF "v8si") (V4DF "v4di")
243 (V4SF "v4si") (V2DF "v2di")
244 (V8SI "v8si") (V4DI "v4di")
245 (V4SI "v4si") (V2DI "v2di")
246 (V16HI "v16hi") (V8HI "v8hi")
247 (V32QI "v32qi") (V16QI "v16qi")])
249 ;; Mapping of vector modes to a vector mode of double size
250 (define_mode_attr ssedoublevecmode
251 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
252 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
253 (V8SF "V16SF") (V4DF "V8DF")
254 (V4SF "V8SF") (V2DF "V4DF")])
256 ;; Mapping of vector modes to a vector mode of half size
257 (define_mode_attr ssehalfvecmode
258 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
259 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
260 (V8SF "V4SF") (V4DF "V2DF")
263 ;; Mapping of vector modes back to the scalar modes
264 (define_mode_attr ssescalarmode
265 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
266 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
267 (V8SF "SF") (V4DF "DF")
268 (V4SF "SF") (V2DF "DF")])
270 ;; Number of scalar elements in each vector type
271 (define_mode_attr ssescalarnum
272 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
273 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
274 (V8SF "8") (V4DF "4")
275 (V4SF "4") (V2DF "2")])
277 ;; SSE prefix for integer vector modes
278 (define_mode_attr sseintprefix
279 [(V2DI "p") (V2DF "")
282 (V8SI "p") (V8SF "")])
284 ;; SSE scalar suffix for vector modes
285 (define_mode_attr ssescalarmodesuffix
287 (V8SF "ss") (V4DF "sd")
288 (V4SF "ss") (V2DF "sd")
289 (V8SI "ss") (V4DI "sd")
292 ;; Pack/unpack vector modes
293 (define_mode_attr sseunpackmode
294 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
295 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
297 (define_mode_attr ssepackmode
298 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
299 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
301 ;; Mapping of the max integer size for xop rotate immediate constraint
302 (define_mode_attr sserotatemax
303 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
305 ;; Mapping of mode to cast intrinsic name
306 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
308 ;; Instruction suffix for sign and zero extensions.
309 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
311 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
312 (define_mode_attr i128
313 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
314 (V8SI "%~128") (V4DI "%~128")])
317 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
319 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
321 ;; Mapping of immediate bits for blend instructions
322 (define_mode_attr blendbits
323 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
325 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
333 ;; All of these patterns are enabled for SSE1 as well as SSE2.
334 ;; This is essential for maintaining stable calling conventions.
336 (define_expand "mov<mode>"
337 [(set (match_operand:V16 0 "nonimmediate_operand" "")
338 (match_operand:V16 1 "nonimmediate_operand" ""))]
341 ix86_expand_vector_move (<MODE>mode, operands);
345 (define_insn "*mov<mode>_internal"
346 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
347 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
349 && (register_operand (operands[0], <MODE>mode)
350 || register_operand (operands[1], <MODE>mode))"
352 switch (which_alternative)
355 return standard_sse_constant_opcode (insn, operands[1]);
358 switch (get_attr_mode (insn))
363 && (misaligned_operand (operands[0], <MODE>mode)
364 || misaligned_operand (operands[1], <MODE>mode)))
365 return "vmovups\t{%1, %0|%0, %1}";
367 return "%vmovaps\t{%1, %0|%0, %1}";
372 && (misaligned_operand (operands[0], <MODE>mode)
373 || misaligned_operand (operands[1], <MODE>mode)))
374 return "vmovupd\t{%1, %0|%0, %1}";
375 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
376 return "%vmovaps\t{%1, %0|%0, %1}";
378 return "%vmovapd\t{%1, %0|%0, %1}";
383 && (misaligned_operand (operands[0], <MODE>mode)
384 || misaligned_operand (operands[1], <MODE>mode)))
385 return "vmovdqu\t{%1, %0|%0, %1}";
386 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
387 return "%vmovaps\t{%1, %0|%0, %1}";
389 return "%vmovdqa\t{%1, %0|%0, %1}";
398 [(set_attr "type" "sselog1,ssemov,ssemov")
399 (set_attr "prefix" "maybe_vex")
401 (cond [(match_test "TARGET_AVX")
402 (const_string "<sseinsnmode>")
403 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
404 (not (match_test "TARGET_SSE2")))
405 (and (eq_attr "alternative" "2")
406 (match_test "TARGET_SSE_TYPELESS_STORES")))
407 (const_string "V4SF")
408 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
409 (const_string "V4SF")
410 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
411 (const_string "V2DF")
413 (const_string "TI")))])
415 (define_insn "sse2_movq128"
416 [(set (match_operand:V2DI 0 "register_operand" "=x")
419 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
420 (parallel [(const_int 0)]))
423 "%vmovq\t{%1, %0|%0, %1}"
424 [(set_attr "type" "ssemov")
425 (set_attr "prefix" "maybe_vex")
426 (set_attr "mode" "TI")])
428 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
429 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
430 ;; from memory, we'd prefer to load the memory directly into the %xmm
431 ;; register. To facilitate this happy circumstance, this pattern won't
432 ;; split until after register allocation. If the 64-bit value didn't
433 ;; come from memory, this is the best we can do. This is much better
434 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
437 (define_insn_and_split "movdi_to_sse"
439 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
440 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
441 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
442 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
444 "&& reload_completed"
447 if (register_operand (operands[1], DImode))
449 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
450 Assemble the 64-bit DImode value in an xmm register. */
451 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
452 gen_rtx_SUBREG (SImode, operands[1], 0)));
453 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
454 gen_rtx_SUBREG (SImode, operands[1], 4)));
455 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
458 else if (memory_operand (operands[1], DImode))
459 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
460 operands[1], const0_rtx));
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
468 "TARGET_SSE && reload_completed"
471 (vec_duplicate:V4SF (match_dup 1))
475 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
476 operands[2] = CONST0_RTX (V4SFmode);
480 [(set (match_operand:V2DF 0 "register_operand" "")
481 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
482 "TARGET_SSE2 && reload_completed"
483 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
485 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
486 operands[2] = CONST0_RTX (DFmode);
489 (define_expand "push<mode>1"
490 [(match_operand:V16 0 "register_operand" "")]
493 ix86_expand_push (<MODE>mode, operands[0]);
497 (define_expand "movmisalign<mode>"
498 [(set (match_operand:V16 0 "nonimmediate_operand" "")
499 (match_operand:V16 1 "nonimmediate_operand" ""))]
502 ix86_expand_vector_move_misalign (<MODE>mode, operands);
506 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
507 [(set (match_operand:VF 0 "nonimmediate_operand" "")
509 [(match_operand:VF 1 "nonimmediate_operand" "")]
513 if (MEM_P (operands[0]) && MEM_P (operands[1]))
514 operands[1] = force_reg (<MODE>mode, operands[1]);
517 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
518 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
520 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
522 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
523 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
524 [(set_attr "type" "ssemov")
525 (set_attr "movu" "1")
526 (set_attr "prefix" "maybe_vex")
527 (set_attr "mode" "<MODE>")])
529 (define_expand "<sse2>_movdqu<avxsizesuffix>"
530 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
531 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
535 if (MEM_P (operands[0]) && MEM_P (operands[1]))
536 operands[1] = force_reg (<MODE>mode, operands[1]);
539 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
540 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
541 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
543 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
544 "%vmovdqu\t{%1, %0|%0, %1}"
545 [(set_attr "type" "ssemov")
546 (set_attr "movu" "1")
547 (set (attr "prefix_data16")
549 (match_test "TARGET_AVX")
552 (set_attr "prefix" "maybe_vex")
553 (set_attr "mode" "<sseinsnmode>")])
555 (define_insn "<sse3>_lddqu<avxsizesuffix>"
556 [(set (match_operand:VI1 0 "register_operand" "=x")
557 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
560 "%vlddqu\t{%1, %0|%0, %1}"
561 [(set_attr "type" "ssemov")
562 (set_attr "movu" "1")
563 (set (attr "prefix_data16")
565 (match_test "TARGET_AVX")
568 (set (attr "prefix_rep")
570 (match_test "TARGET_AVX")
573 (set_attr "prefix" "maybe_vex")
574 (set_attr "mode" "<sseinsnmode>")])
576 (define_insn "sse2_movntsi"
577 [(set (match_operand:SI 0 "memory_operand" "=m")
578 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
581 "movnti\t{%1, %0|%0, %1}"
582 [(set_attr "type" "ssemov")
583 (set_attr "prefix_data16" "0")
584 (set_attr "mode" "V2DF")])
586 (define_insn "<sse>_movnt<mode>"
587 [(set (match_operand:VF 0 "memory_operand" "=m")
588 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
591 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
592 [(set_attr "type" "ssemov")
593 (set_attr "prefix" "maybe_vex")
594 (set_attr "mode" "<MODE>")])
596 (define_insn "<sse2>_movnt<mode>"
597 [(set (match_operand:VI8 0 "memory_operand" "=m")
598 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
601 "%vmovntdq\t{%1, %0|%0, %1}"
602 [(set_attr "type" "ssecvt")
603 (set (attr "prefix_data16")
605 (match_test "TARGET_AVX")
608 (set_attr "prefix" "maybe_vex")
609 (set_attr "mode" "<sseinsnmode>")])
611 ; Expand patterns for non-temporal stores. At the moment, only those
612 ; that directly map to insns are defined; it would be possible to
613 ; define patterns for other modes that would expand to several insns.
615 ;; Modes handled by storent patterns.
616 (define_mode_iterator STORENT_MODE
617 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
619 (V8SF "TARGET_AVX") V4SF
620 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
622 (define_expand "storent<mode>"
623 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
625 [(match_operand:STORENT_MODE 1 "register_operand" "")]
629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
631 ;; Parallel floating point arithmetic
633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
635 (define_expand "<code><mode>2"
636 [(set (match_operand:VF 0 "register_operand" "")
638 (match_operand:VF 1 "register_operand" "")))]
640 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
642 (define_insn_and_split "*absneg<mode>2"
643 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
644 (match_operator:VF 3 "absneg_operator"
645 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
646 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
649 "&& reload_completed"
652 enum rtx_code absneg_op;
658 if (MEM_P (operands[1]))
659 op1 = operands[2], op2 = operands[1];
661 op1 = operands[1], op2 = operands[2];
666 if (rtx_equal_p (operands[0], operands[1]))
672 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
673 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
674 t = gen_rtx_SET (VOIDmode, operands[0], t);
678 [(set_attr "isa" "noavx,noavx,avx,avx")])
680 (define_expand "<plusminus_insn><mode>3"
681 [(set (match_operand:VF 0 "register_operand" "")
683 (match_operand:VF 1 "nonimmediate_operand" "")
684 (match_operand:VF 2 "nonimmediate_operand" "")))]
686 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
688 (define_insn "*<plusminus_insn><mode>3"
689 [(set (match_operand:VF 0 "register_operand" "=x,x")
691 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
692 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
693 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
695 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
696 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
697 [(set_attr "isa" "noavx,avx")
698 (set_attr "type" "sseadd")
699 (set_attr "prefix" "orig,vex")
700 (set_attr "mode" "<MODE>")])
702 (define_insn "<sse>_vm<plusminus_insn><mode>3"
703 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
706 (match_operand:VF_128 1 "register_operand" "0,x")
707 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
712 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
713 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
714 [(set_attr "isa" "noavx,avx")
715 (set_attr "type" "sseadd")
716 (set_attr "prefix" "orig,vex")
717 (set_attr "mode" "<ssescalarmode>")])
719 (define_expand "mul<mode>3"
720 [(set (match_operand:VF 0 "register_operand" "")
722 (match_operand:VF 1 "nonimmediate_operand" "")
723 (match_operand:VF 2 "nonimmediate_operand" "")))]
725 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
727 (define_insn "*mul<mode>3"
728 [(set (match_operand:VF 0 "register_operand" "=x,x")
730 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
731 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
732 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
734 mul<ssemodesuffix>\t{%2, %0|%0, %2}
735 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
736 [(set_attr "isa" "noavx,avx")
737 (set_attr "type" "ssemul")
738 (set_attr "prefix" "orig,vex")
739 (set_attr "mode" "<MODE>")])
741 (define_insn "<sse>_vmmul<mode>3"
742 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
745 (match_operand:VF_128 1 "register_operand" "0,x")
746 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
751 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
752 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
753 [(set_attr "isa" "noavx,avx")
754 (set_attr "type" "ssemul")
755 (set_attr "prefix" "orig,vex")
756 (set_attr "mode" "<ssescalarmode>")])
758 (define_expand "div<mode>3"
759 [(set (match_operand:VF2 0 "register_operand" "")
760 (div:VF2 (match_operand:VF2 1 "register_operand" "")
761 (match_operand:VF2 2 "nonimmediate_operand" "")))]
763 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
765 (define_expand "div<mode>3"
766 [(set (match_operand:VF1 0 "register_operand" "")
767 (div:VF1 (match_operand:VF1 1 "register_operand" "")
768 (match_operand:VF1 2 "nonimmediate_operand" "")))]
771 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
774 && TARGET_RECIP_VEC_DIV
775 && !optimize_insn_for_size_p ()
776 && flag_finite_math_only && !flag_trapping_math
777 && flag_unsafe_math_optimizations)
779 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
784 (define_insn "<sse>_div<mode>3"
785 [(set (match_operand:VF 0 "register_operand" "=x,x")
787 (match_operand:VF 1 "register_operand" "0,x")
788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
791 div<ssemodesuffix>\t{%2, %0|%0, %2}
792 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
793 [(set_attr "isa" "noavx,avx")
794 (set_attr "type" "ssediv")
795 (set_attr "prefix" "orig,vex")
796 (set_attr "mode" "<MODE>")])
798 (define_insn "<sse>_vmdiv<mode>3"
799 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
802 (match_operand:VF_128 1 "register_operand" "0,x")
803 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
808 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
809 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
810 [(set_attr "isa" "noavx,avx")
811 (set_attr "type" "ssediv")
812 (set_attr "prefix" "orig,vex")
813 (set_attr "mode" "<ssescalarmode>")])
815 (define_insn "<sse>_rcp<mode>2"
816 [(set (match_operand:VF1 0 "register_operand" "=x")
818 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
820 "%vrcpps\t{%1, %0|%0, %1}"
821 [(set_attr "type" "sse")
822 (set_attr "atom_sse_attr" "rcp")
823 (set_attr "prefix" "maybe_vex")
824 (set_attr "mode" "<MODE>")])
826 (define_insn "sse_vmrcpv4sf2"
827 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
829 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
831 (match_operand:V4SF 2 "register_operand" "0,x")
835 rcpss\t{%1, %0|%0, %1}
836 vrcpss\t{%1, %2, %0|%0, %2, %1}"
837 [(set_attr "isa" "noavx,avx")
838 (set_attr "type" "sse")
839 (set_attr "atom_sse_attr" "rcp")
840 (set_attr "prefix" "orig,vex")
841 (set_attr "mode" "SF")])
843 (define_expand "sqrt<mode>2"
844 [(set (match_operand:VF2 0 "register_operand" "")
845 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
848 (define_expand "sqrt<mode>2"
849 [(set (match_operand:VF1 0 "register_operand" "")
850 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
854 && TARGET_RECIP_VEC_SQRT
855 && !optimize_insn_for_size_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
864 (define_insn "<sse>_sqrt<mode>2"
865 [(set (match_operand:VF 0 "register_operand" "=x")
866 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "<MODE>")])
874 (define_insn "<sse>_vmsqrt<mode>2"
875 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
878 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
879 (match_operand:VF_128 2 "register_operand" "0,x")
883 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
884 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
885 [(set_attr "isa" "noavx,avx")
886 (set_attr "type" "sse")
887 (set_attr "atom_sse_attr" "sqrt")
888 (set_attr "prefix" "orig,vex")
889 (set_attr "mode" "<ssescalarmode>")])
891 (define_expand "rsqrt<mode>2"
892 [(set (match_operand:VF1 0 "register_operand" "")
894 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
897 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
901 (define_insn "<sse>_rsqrt<mode>2"
902 [(set (match_operand:VF1 0 "register_operand" "=x")
904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
906 "%vrsqrtps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "prefix" "maybe_vex")
909 (set_attr "mode" "<MODE>")])
911 (define_insn "sse_vmrsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
916 (match_operand:V4SF 2 "register_operand" "0,x")
920 rsqrtss\t{%1, %0|%0, %1}
921 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
922 [(set_attr "isa" "noavx,avx")
923 (set_attr "type" "sse")
924 (set_attr "prefix" "orig,vex")
925 (set_attr "mode" "SF")])
927 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
928 ;; isn't really correct, as those rtl operators aren't defined when
929 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
931 (define_expand "<code><mode>3"
932 [(set (match_operand:VF 0 "register_operand" "")
934 (match_operand:VF 1 "nonimmediate_operand" "")
935 (match_operand:VF 2 "nonimmediate_operand" "")))]
938 if (!flag_finite_math_only)
939 operands[1] = force_reg (<MODE>mode, operands[1]);
940 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
943 (define_insn "*<code><mode>3_finite"
944 [(set (match_operand:VF 0 "register_operand" "=x,x")
946 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
947 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
948 "TARGET_SSE && flag_finite_math_only
949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
951 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
952 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
953 [(set_attr "isa" "noavx,avx")
954 (set_attr "type" "sseadd")
955 (set_attr "prefix" "orig,vex")
956 (set_attr "mode" "<MODE>")])
958 (define_insn "*<code><mode>3"
959 [(set (match_operand:VF 0 "register_operand" "=x,x")
961 (match_operand:VF 1 "register_operand" "0,x")
962 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
963 "TARGET_SSE && !flag_finite_math_only"
965 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
966 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
967 [(set_attr "isa" "noavx,avx")
968 (set_attr "type" "sseadd")
969 (set_attr "prefix" "orig,vex")
970 (set_attr "mode" "<MODE>")])
972 (define_insn "<sse>_vm<code><mode>3"
973 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
976 (match_operand:VF_128 1 "register_operand" "0,x")
977 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
982 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
983 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
984 [(set_attr "isa" "noavx,avx")
985 (set_attr "type" "sse")
986 (set_attr "prefix" "orig,vex")
987 (set_attr "mode" "<ssescalarmode>")])
989 ;; These versions of the min/max patterns implement exactly the operations
990 ;; min = (op1 < op2 ? op1 : op2)
991 ;; max = (!(op1 < op2) ? op1 : op2)
992 ;; Their operands are not commutative, and thus they may be used in the
993 ;; presence of -0.0 and NaN.
995 (define_insn "*ieee_smin<mode>3"
996 [(set (match_operand:VF 0 "register_operand" "=x,x")
998 [(match_operand:VF 1 "register_operand" "0,x")
999 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1003 min<ssemodesuffix>\t{%2, %0|%0, %2}
1004 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1005 [(set_attr "isa" "noavx,avx")
1006 (set_attr "type" "sseadd")
1007 (set_attr "prefix" "orig,vex")
1008 (set_attr "mode" "<MODE>")])
1010 (define_insn "*ieee_smax<mode>3"
1011 [(set (match_operand:VF 0 "register_operand" "=x,x")
1013 [(match_operand:VF 1 "register_operand" "0,x")
1014 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1018 max<ssemodesuffix>\t{%2, %0|%0, %2}
1019 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1020 [(set_attr "isa" "noavx,avx")
1021 (set_attr "type" "sseadd")
1022 (set_attr "prefix" "orig,vex")
1023 (set_attr "mode" "<MODE>")])
1025 (define_insn "avx_addsubv4df3"
1026 [(set (match_operand:V4DF 0 "register_operand" "=x")
1029 (match_operand:V4DF 1 "register_operand" "x")
1030 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1031 (minus:V4DF (match_dup 1) (match_dup 2))
1034 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1035 [(set_attr "type" "sseadd")
1036 (set_attr "prefix" "vex")
1037 (set_attr "mode" "V4DF")])
1039 (define_insn "sse3_addsubv2df3"
1040 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1043 (match_operand:V2DF 1 "register_operand" "0,x")
1044 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1045 (minus:V2DF (match_dup 1) (match_dup 2))
1049 addsubpd\t{%2, %0|%0, %2}
1050 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1051 [(set_attr "isa" "noavx,avx")
1052 (set_attr "type" "sseadd")
1053 (set_attr "atom_unit" "complex")
1054 (set_attr "prefix" "orig,vex")
1055 (set_attr "mode" "V2DF")])
1057 (define_insn "avx_addsubv8sf3"
1058 [(set (match_operand:V8SF 0 "register_operand" "=x")
1061 (match_operand:V8SF 1 "register_operand" "x")
1062 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1063 (minus:V8SF (match_dup 1) (match_dup 2))
1066 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1067 [(set_attr "type" "sseadd")
1068 (set_attr "prefix" "vex")
1069 (set_attr "mode" "V8SF")])
1071 (define_insn "sse3_addsubv4sf3"
1072 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1075 (match_operand:V4SF 1 "register_operand" "0,x")
1076 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1077 (minus:V4SF (match_dup 1) (match_dup 2))
1081 addsubps\t{%2, %0|%0, %2}
1082 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1083 [(set_attr "isa" "noavx,avx")
1084 (set_attr "type" "sseadd")
1085 (set_attr "prefix" "orig,vex")
1086 (set_attr "prefix_rep" "1,*")
1087 (set_attr "mode" "V4SF")])
1089 (define_insn "avx_h<plusminus_insn>v4df3"
1090 [(set (match_operand:V4DF 0 "register_operand" "=x")
1095 (match_operand:V4DF 1 "register_operand" "x")
1096 (parallel [(const_int 0)]))
1097 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1099 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1100 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1104 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1105 (parallel [(const_int 0)]))
1106 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1108 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1109 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1111 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1112 [(set_attr "type" "sseadd")
1113 (set_attr "prefix" "vex")
1114 (set_attr "mode" "V4DF")])
1116 (define_insn "sse3_h<plusminus_insn>v2df3"
1117 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1121 (match_operand:V2DF 1 "register_operand" "0,x")
1122 (parallel [(const_int 0)]))
1123 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1126 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1127 (parallel [(const_int 0)]))
1128 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1131 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1132 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1133 [(set_attr "isa" "noavx,avx")
1134 (set_attr "type" "sseadd")
1135 (set_attr "prefix" "orig,vex")
1136 (set_attr "mode" "V2DF")])
1138 (define_insn "avx_h<plusminus_insn>v8sf3"
1139 [(set (match_operand:V8SF 0 "register_operand" "=x")
1145 (match_operand:V8SF 1 "register_operand" "x")
1146 (parallel [(const_int 0)]))
1147 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1149 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1150 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1154 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1155 (parallel [(const_int 0)]))
1156 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1158 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1159 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1163 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1166 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1167 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1170 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1171 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1173 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1174 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1176 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "prefix" "vex")
1179 (set_attr "mode" "V8SF")])
1181 (define_insn "sse3_h<plusminus_insn>v4sf3"
1182 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1187 (match_operand:V4SF 1 "register_operand" "0,x")
1188 (parallel [(const_int 0)]))
1189 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1191 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1192 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1196 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1197 (parallel [(const_int 0)]))
1198 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1200 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1201 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1204 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1205 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1206 [(set_attr "isa" "noavx,avx")
1207 (set_attr "type" "sseadd")
1208 (set_attr "atom_unit" "complex")
1209 (set_attr "prefix" "orig,vex")
1210 (set_attr "prefix_rep" "1,*")
1211 (set_attr "mode" "V4SF")])
1213 (define_expand "reduc_splus_v4df"
1214 [(match_operand:V4DF 0 "register_operand" "")
1215 (match_operand:V4DF 1 "register_operand" "")]
1218 rtx tmp = gen_reg_rtx (V4DFmode);
1219 rtx tmp2 = gen_reg_rtx (V4DFmode);
1220 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1221 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1222 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1226 (define_expand "reduc_splus_v2df"
1227 [(match_operand:V2DF 0 "register_operand" "")
1228 (match_operand:V2DF 1 "register_operand" "")]
1231 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1235 (define_expand "reduc_splus_v8sf"
1236 [(match_operand:V8SF 0 "register_operand" "")
1237 (match_operand:V8SF 1 "register_operand" "")]
1240 rtx tmp = gen_reg_rtx (V8SFmode);
1241 rtx tmp2 = gen_reg_rtx (V8SFmode);
1242 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1243 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1244 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1245 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1249 (define_expand "reduc_splus_v4sf"
1250 [(match_operand:V4SF 0 "register_operand" "")
1251 (match_operand:V4SF 1 "register_operand" "")]
1256 rtx tmp = gen_reg_rtx (V4SFmode);
1257 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1258 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1261 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1265 ;; Modes handled by reduc_sm{in,ax}* patterns.
1266 (define_mode_iterator REDUC_SMINMAX_MODE
1267 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1268 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1269 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1270 (V4SF "TARGET_SSE")])
1272 (define_expand "reduc_<code>_<mode>"
1273 [(smaxmin:REDUC_SMINMAX_MODE
1274 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1275 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1278 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1282 (define_expand "reduc_<code>_<mode>"
1284 (match_operand:VI_256 0 "register_operand" "")
1285 (match_operand:VI_256 1 "register_operand" ""))]
1288 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1292 (define_expand "reduc_umin_v8hi"
1294 (match_operand:V8HI 0 "register_operand" "")
1295 (match_operand:V8HI 1 "register_operand" ""))]
1298 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1304 ;; Parallel floating point comparisons
1306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1308 (define_insn "avx_cmp<mode>3"
1309 [(set (match_operand:VF 0 "register_operand" "=x")
1311 [(match_operand:VF 1 "register_operand" "x")
1312 (match_operand:VF 2 "nonimmediate_operand" "xm")
1313 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1316 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1317 [(set_attr "type" "ssecmp")
1318 (set_attr "length_immediate" "1")
1319 (set_attr "prefix" "vex")
1320 (set_attr "mode" "<MODE>")])
1322 (define_insn "avx_vmcmp<mode>3"
1323 [(set (match_operand:VF_128 0 "register_operand" "=x")
1326 [(match_operand:VF_128 1 "register_operand" "x")
1327 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1328 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1333 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1334 [(set_attr "type" "ssecmp")
1335 (set_attr "length_immediate" "1")
1336 (set_attr "prefix" "vex")
1337 (set_attr "mode" "<ssescalarmode>")])
1339 (define_insn "*<sse>_maskcmp<mode>3_comm"
1340 [(set (match_operand:VF 0 "register_operand" "=x,x")
1341 (match_operator:VF 3 "sse_comparison_operator"
1342 [(match_operand:VF 1 "register_operand" "%0,x")
1343 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1345 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1347 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "ssecmp")
1351 (set_attr "length_immediate" "1")
1352 (set_attr "prefix" "orig,vex")
1353 (set_attr "mode" "<MODE>")])
1355 (define_insn "<sse>_maskcmp<mode>3"
1356 [(set (match_operand:VF 0 "register_operand" "=x,x")
1357 (match_operator:VF 3 "sse_comparison_operator"
1358 [(match_operand:VF 1 "register_operand" "0,x")
1359 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1362 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1363 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1364 [(set_attr "isa" "noavx,avx")
1365 (set_attr "type" "ssecmp")
1366 (set_attr "length_immediate" "1")
1367 (set_attr "prefix" "orig,vex")
1368 (set_attr "mode" "<MODE>")])
1370 (define_insn "<sse>_vmmaskcmp<mode>3"
1371 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1373 (match_operator:VF_128 3 "sse_comparison_operator"
1374 [(match_operand:VF_128 1 "register_operand" "0,x")
1375 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1380 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1381 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1382 [(set_attr "isa" "noavx,avx")
1383 (set_attr "type" "ssecmp")
1384 (set_attr "length_immediate" "1,*")
1385 (set_attr "prefix" "orig,vex")
1386 (set_attr "mode" "<ssescalarmode>")])
1388 (define_insn "<sse>_comi"
1389 [(set (reg:CCFP FLAGS_REG)
1392 (match_operand:<ssevecmode> 0 "register_operand" "x")
1393 (parallel [(const_int 0)]))
1395 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1396 (parallel [(const_int 0)]))))]
1397 "SSE_FLOAT_MODE_P (<MODE>mode)"
1398 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1399 [(set_attr "type" "ssecomi")
1400 (set_attr "prefix" "maybe_vex")
1401 (set_attr "prefix_rep" "0")
1402 (set (attr "prefix_data16")
1403 (if_then_else (eq_attr "mode" "DF")
1405 (const_string "0")))
1406 (set_attr "mode" "<MODE>")])
1408 (define_insn "<sse>_ucomi"
1409 [(set (reg:CCFPU FLAGS_REG)
1412 (match_operand:<ssevecmode> 0 "register_operand" "x")
1413 (parallel [(const_int 0)]))
1415 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1416 (parallel [(const_int 0)]))))]
1417 "SSE_FLOAT_MODE_P (<MODE>mode)"
1418 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1419 [(set_attr "type" "ssecomi")
1420 (set_attr "prefix" "maybe_vex")
1421 (set_attr "prefix_rep" "0")
1422 (set (attr "prefix_data16")
1423 (if_then_else (eq_attr "mode" "DF")
1425 (const_string "0")))
1426 (set_attr "mode" "<MODE>")])
1428 (define_expand "vcond<V_256:mode><VF_256:mode>"
1429 [(set (match_operand:V_256 0 "register_operand" "")
1431 (match_operator 3 ""
1432 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1433 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1434 (match_operand:V_256 1 "general_operand" "")
1435 (match_operand:V_256 2 "general_operand" "")))]
1437 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1438 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1440 bool ok = ix86_expand_fp_vcond (operands);
1445 (define_expand "vcond<V_128:mode><VF_128:mode>"
1446 [(set (match_operand:V_128 0 "register_operand" "")
1448 (match_operator 3 ""
1449 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1450 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1451 (match_operand:V_128 1 "general_operand" "")
1452 (match_operand:V_128 2 "general_operand" "")))]
1454 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1455 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1457 bool ok = ix86_expand_fp_vcond (operands);
1462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1464 ;; Parallel floating point logical operations
1466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1468 (define_insn "<sse>_andnot<mode>3"
1469 [(set (match_operand:VF 0 "register_operand" "=x,x")
1472 (match_operand:VF 1 "register_operand" "0,x"))
1473 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1476 static char buf[32];
1479 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1481 switch (which_alternative)
1484 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1487 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1493 snprintf (buf, sizeof (buf), insn, suffix);
1496 [(set_attr "isa" "noavx,avx")
1497 (set_attr "type" "sselog")
1498 (set_attr "prefix" "orig,vex")
1499 (set_attr "mode" "<MODE>")])
1501 (define_expand "<code><mode>3"
1502 [(set (match_operand:VF 0 "register_operand" "")
1504 (match_operand:VF 1 "nonimmediate_operand" "")
1505 (match_operand:VF 2 "nonimmediate_operand" "")))]
1507 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1509 (define_insn "*<code><mode>3"
1510 [(set (match_operand:VF 0 "register_operand" "=x,x")
1512 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1513 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1514 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1516 static char buf[32];
1519 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1521 switch (which_alternative)
1524 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1527 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1533 snprintf (buf, sizeof (buf), insn, suffix);
1536 [(set_attr "isa" "noavx,avx")
1537 (set_attr "type" "sselog")
1538 (set_attr "prefix" "orig,vex")
1539 (set_attr "mode" "<MODE>")])
1541 (define_expand "copysign<mode>3"
1544 (not:VF (match_dup 3))
1545 (match_operand:VF 1 "nonimmediate_operand" "")))
1547 (and:VF (match_dup 3)
1548 (match_operand:VF 2 "nonimmediate_operand" "")))
1549 (set (match_operand:VF 0 "register_operand" "")
1550 (ior:VF (match_dup 4) (match_dup 5)))]
1553 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1555 operands[4] = gen_reg_rtx (<MODE>mode);
1556 operands[5] = gen_reg_rtx (<MODE>mode);
1559 ;; Also define scalar versions. These are used for abs, neg, and
1560 ;; conditional move. Using subregs into vector modes causes register
1561 ;; allocation lossage. These patterns do not allow memory operands
1562 ;; because the native instructions read the full 128-bits.
1564 (define_insn "*andnot<mode>3"
1565 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1568 (match_operand:MODEF 1 "register_operand" "0,x"))
1569 (match_operand:MODEF 2 "register_operand" "x,x")))]
1570 "SSE_FLOAT_MODE_P (<MODE>mode)"
1572 static char buf[32];
1575 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1577 switch (which_alternative)
1580 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1583 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1589 snprintf (buf, sizeof (buf), insn, suffix);
1592 [(set_attr "isa" "noavx,avx")
1593 (set_attr "type" "sselog")
1594 (set_attr "prefix" "orig,vex")
1595 (set_attr "mode" "<ssevecmode>")])
1597 (define_insn "*<code><mode>3"
1598 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1600 (match_operand:MODEF 1 "register_operand" "%0,x")
1601 (match_operand:MODEF 2 "register_operand" "x,x")))]
1602 "SSE_FLOAT_MODE_P (<MODE>mode)"
1604 static char buf[32];
1607 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1609 switch (which_alternative)
1612 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1615 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1621 snprintf (buf, sizeof (buf), insn, suffix);
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "sselog")
1626 (set_attr "prefix" "orig,vex")
1627 (set_attr "mode" "<ssevecmode>")])
1629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1631 ;; FMA4 floating point multiply/accumulate instructions. This
1632 ;; includes the scalar version of the instructions as well as the
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1638 ;; combine to generate a multiply/add with two memory references. We then
1639 ;; split this insn, into loading up the destination register with one of the
1640 ;; memory operations. If we don't manage to split the insn, reload will
1641 ;; generate the appropriate moves. The reason this is needed, is that combine
1642 ;; has already folded one of the memory references into both the multiply and
1643 ;; add insns, and it can't generate a new pseudo. I.e.:
1644 ;; (set (reg1) (mem (addr1)))
1645 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1646 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1648 ;; ??? This is historic, pre-dating the gimple fma transformation.
1649 ;; We could now properly represent that only one memory operand is
1650 ;; allowed and not be penalized during optimization.
1652 ;; Intrinsic FMA operations.
1654 ;; The standard names for fma is only available with SSE math enabled.
1655 (define_expand "fma<mode>4"
1656 [(set (match_operand:FMAMODE 0 "register_operand")
1658 (match_operand:FMAMODE 1 "nonimmediate_operand")
1659 (match_operand:FMAMODE 2 "nonimmediate_operand")
1660 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1661 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1663 (define_expand "fms<mode>4"
1664 [(set (match_operand:FMAMODE 0 "register_operand")
1666 (match_operand:FMAMODE 1 "nonimmediate_operand")
1667 (match_operand:FMAMODE 2 "nonimmediate_operand")
1668 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1669 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1671 (define_expand "fnma<mode>4"
1672 [(set (match_operand:FMAMODE 0 "register_operand")
1674 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1675 (match_operand:FMAMODE 2 "nonimmediate_operand")
1676 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1677 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1679 (define_expand "fnms<mode>4"
1680 [(set (match_operand:FMAMODE 0 "register_operand")
1682 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1683 (match_operand:FMAMODE 2 "nonimmediate_operand")
1684 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1685 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1687 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1688 (define_expand "fma4i_fmadd_<mode>"
1689 [(set (match_operand:FMAMODE 0 "register_operand")
1691 (match_operand:FMAMODE 1 "nonimmediate_operand")
1692 (match_operand:FMAMODE 2 "nonimmediate_operand")
1693 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1694 "TARGET_FMA || TARGET_FMA4")
1696 (define_insn "*fma4i_fmadd_<mode>"
1697 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1699 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1700 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1701 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1703 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1704 [(set_attr "type" "ssemuladd")
1705 (set_attr "mode" "<MODE>")])
1707 (define_insn "*fma4i_fmsub_<mode>"
1708 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1710 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1711 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1713 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1715 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1716 [(set_attr "type" "ssemuladd")
1717 (set_attr "mode" "<MODE>")])
1719 (define_insn "*fma4i_fnmadd_<mode>"
1720 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1723 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1724 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1725 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1727 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1728 [(set_attr "type" "ssemuladd")
1729 (set_attr "mode" "<MODE>")])
1731 (define_insn "*fma4i_fnmsub_<mode>"
1732 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1735 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1736 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1738 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1740 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1741 [(set_attr "type" "ssemuladd")
1742 (set_attr "mode" "<MODE>")])
1744 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1745 ;; entire destination register, with the high-order elements zeroed.
1747 (define_expand "fma4i_vmfmadd_<mode>"
1748 [(set (match_operand:VF_128 0 "register_operand")
1751 (match_operand:VF_128 1 "nonimmediate_operand")
1752 (match_operand:VF_128 2 "nonimmediate_operand")
1753 (match_operand:VF_128 3 "nonimmediate_operand"))
1758 operands[4] = CONST0_RTX (<MODE>mode);
1761 (define_expand "fmai_vmfmadd_<mode>"
1762 [(set (match_operand:VF_128 0 "register_operand")
1765 (match_operand:VF_128 1 "nonimmediate_operand")
1766 (match_operand:VF_128 2 "nonimmediate_operand")
1767 (match_operand:VF_128 3 "nonimmediate_operand"))
1772 (define_insn "*fmai_fmadd_<mode>"
1773 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1776 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1777 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1778 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1783 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1784 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1785 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1786 [(set_attr "type" "ssemuladd")
1787 (set_attr "mode" "<MODE>")])
1789 (define_insn "*fmai_fmsub_<mode>"
1790 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1793 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1794 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1796 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1801 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1802 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1803 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 (define_insn "*fmai_fnmadd_<mode>"
1808 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1812 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1813 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1814 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1819 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1820 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1821 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 (define_insn "*fmai_fnmsub_<mode>"
1826 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1830 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1831 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1833 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1838 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1839 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1840 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1841 [(set_attr "type" "ssemuladd")
1842 (set_attr "mode" "<MODE>")])
1844 (define_insn "*fma4i_vmfmadd_<mode>"
1845 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1848 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1849 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1850 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1851 (match_operand:VF_128 4 "const0_operand" "")
1854 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1855 [(set_attr "type" "ssemuladd")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "*fma4i_vmfmsub_<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1862 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1865 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1866 (match_operand:VF_128 4 "const0_operand" "")
1869 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1870 [(set_attr "type" "ssemuladd")
1871 (set_attr "mode" "<MODE>")])
1873 (define_insn "*fma4i_vmfnmadd_<mode>"
1874 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1878 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1880 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1881 (match_operand:VF_128 4 "const0_operand" "")
1884 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1885 [(set_attr "type" "ssemuladd")
1886 (set_attr "mode" "<MODE>")])
1888 (define_insn "*fma4i_vmfnmsub_<mode>"
1889 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1893 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1894 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1896 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1897 (match_operand:VF_128 4 "const0_operand" "")
1900 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1904 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1906 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1910 ;; It would be possible to represent these without the UNSPEC as
1913 ;; (fma op1 op2 op3)
1914 ;; (fma op1 op2 (neg op3))
1917 ;; But this doesn't seem useful in practice.
1919 (define_expand "fmaddsub_<mode>"
1920 [(set (match_operand:VF 0 "register_operand")
1922 [(match_operand:VF 1 "nonimmediate_operand")
1923 (match_operand:VF 2 "nonimmediate_operand")
1924 (match_operand:VF 3 "nonimmediate_operand")]
1926 "TARGET_FMA || TARGET_FMA4")
1928 (define_insn "*fma4_fmaddsub_<mode>"
1929 [(set (match_operand:VF 0 "register_operand" "=x,x")
1931 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1932 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1933 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1936 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 (define_insn "*fma4_fmsubadd_<mode>"
1941 [(set (match_operand:VF 0 "register_operand" "=x,x")
1943 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1944 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1946 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1949 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1955 ;; FMA3 floating point multiply/accumulate instructions.
1957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 (define_insn "*fma_fmadd_<mode>"
1960 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1962 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1963 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1964 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1967 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1968 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1969 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1970 [(set_attr "type" "ssemuladd")
1971 (set_attr "mode" "<MODE>")])
1973 (define_insn "*fma_fmsub_<mode>"
1974 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1976 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1977 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1979 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1982 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1983 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1984 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1985 [(set_attr "type" "ssemuladd")
1986 (set_attr "mode" "<MODE>")])
1988 (define_insn "*fma_fnmadd_<mode>"
1989 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1992 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1993 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1994 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1997 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1998 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1999 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2000 [(set_attr "type" "ssemuladd")
2001 (set_attr "mode" "<MODE>")])
2003 (define_insn "*fma_fnmsub_<mode>"
2004 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2007 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2008 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2010 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2013 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2014 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2015 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2016 [(set_attr "type" "ssemuladd")
2017 (set_attr "mode" "<MODE>")])
2019 (define_insn "*fma_fmaddsub_<mode>"
2020 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2022 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2023 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2024 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2028 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2029 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2030 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "type" "ssemuladd")
2032 (set_attr "mode" "<MODE>")])
2034 (define_insn "*fma_fmsubadd_<mode>"
2035 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2037 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2038 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2040 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2044 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2045 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2046 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2047 [(set_attr "type" "ssemuladd")
2048 (set_attr "mode" "<MODE>")])
2050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2052 ;; Parallel single-precision floating point conversion operations
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 (define_insn "sse_cvtpi2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2060 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2061 (match_operand:V4SF 1 "register_operand" "0")
2064 "cvtpi2ps\t{%2, %0|%0, %2}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "mode" "V4SF")])
2068 (define_insn "sse_cvtps2pi"
2069 [(set (match_operand:V2SI 0 "register_operand" "=y")
2071 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2073 (parallel [(const_int 0) (const_int 1)])))]
2075 "cvtps2pi\t{%1, %0|%0, %1}"
2076 [(set_attr "type" "ssecvt")
2077 (set_attr "unit" "mmx")
2078 (set_attr "mode" "DI")])
2080 (define_insn "sse_cvttps2pi"
2081 [(set (match_operand:V2SI 0 "register_operand" "=y")
2083 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2084 (parallel [(const_int 0) (const_int 1)])))]
2086 "cvttps2pi\t{%1, %0|%0, %1}"
2087 [(set_attr "type" "ssecvt")
2088 (set_attr "unit" "mmx")
2089 (set_attr "prefix_rep" "0")
2090 (set_attr "mode" "SF")])
2092 (define_insn "sse_cvtsi2ss"
2093 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2096 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2097 (match_operand:V4SF 1 "register_operand" "0,0,x")
2101 cvtsi2ss\t{%2, %0|%0, %2}
2102 cvtsi2ss\t{%2, %0|%0, %2}
2103 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2104 [(set_attr "isa" "noavx,noavx,avx")
2105 (set_attr "type" "sseicvt")
2106 (set_attr "athlon_decode" "vector,double,*")
2107 (set_attr "amdfam10_decode" "vector,double,*")
2108 (set_attr "bdver1_decode" "double,direct,*")
2109 (set_attr "prefix" "orig,orig,vex")
2110 (set_attr "mode" "SF")])
2112 (define_insn "sse_cvtsi2ssq"
2113 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2116 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2117 (match_operand:V4SF 1 "register_operand" "0,0,x")
2119 "TARGET_SSE && TARGET_64BIT"
2121 cvtsi2ssq\t{%2, %0|%0, %2}
2122 cvtsi2ssq\t{%2, %0|%0, %2}
2123 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2124 [(set_attr "isa" "noavx,noavx,avx")
2125 (set_attr "type" "sseicvt")
2126 (set_attr "athlon_decode" "vector,double,*")
2127 (set_attr "amdfam10_decode" "vector,double,*")
2128 (set_attr "bdver1_decode" "double,direct,*")
2129 (set_attr "length_vex" "*,*,4")
2130 (set_attr "prefix_rex" "1,1,*")
2131 (set_attr "prefix" "orig,orig,vex")
2132 (set_attr "mode" "SF")])
2134 (define_insn "sse_cvtss2si"
2135 [(set (match_operand:SI 0 "register_operand" "=r,r")
2138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2139 (parallel [(const_int 0)]))]
2140 UNSPEC_FIX_NOTRUNC))]
2142 "%vcvtss2si\t{%1, %0|%0, %1}"
2143 [(set_attr "type" "sseicvt")
2144 (set_attr "athlon_decode" "double,vector")
2145 (set_attr "bdver1_decode" "double,double")
2146 (set_attr "prefix_rep" "1")
2147 (set_attr "prefix" "maybe_vex")
2148 (set_attr "mode" "SI")])
2150 (define_insn "sse_cvtss2si_2"
2151 [(set (match_operand:SI 0 "register_operand" "=r,r")
2152 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2153 UNSPEC_FIX_NOTRUNC))]
2155 "%vcvtss2si\t{%1, %0|%0, %1}"
2156 [(set_attr "type" "sseicvt")
2157 (set_attr "athlon_decode" "double,vector")
2158 (set_attr "amdfam10_decode" "double,double")
2159 (set_attr "bdver1_decode" "double,double")
2160 (set_attr "prefix_rep" "1")
2161 (set_attr "prefix" "maybe_vex")
2162 (set_attr "mode" "SI")])
2164 (define_insn "sse_cvtss2siq"
2165 [(set (match_operand:DI 0 "register_operand" "=r,r")
2168 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2169 (parallel [(const_int 0)]))]
2170 UNSPEC_FIX_NOTRUNC))]
2171 "TARGET_SSE && TARGET_64BIT"
2172 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2173 [(set_attr "type" "sseicvt")
2174 (set_attr "athlon_decode" "double,vector")
2175 (set_attr "bdver1_decode" "double,double")
2176 (set_attr "prefix_rep" "1")
2177 (set_attr "prefix" "maybe_vex")
2178 (set_attr "mode" "DI")])
2180 (define_insn "sse_cvtss2siq_2"
2181 [(set (match_operand:DI 0 "register_operand" "=r,r")
2182 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2183 UNSPEC_FIX_NOTRUNC))]
2184 "TARGET_SSE && TARGET_64BIT"
2185 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2186 [(set_attr "type" "sseicvt")
2187 (set_attr "athlon_decode" "double,vector")
2188 (set_attr "amdfam10_decode" "double,double")
2189 (set_attr "bdver1_decode" "double,double")
2190 (set_attr "prefix_rep" "1")
2191 (set_attr "prefix" "maybe_vex")
2192 (set_attr "mode" "DI")])
2194 (define_insn "sse_cvttss2si"
2195 [(set (match_operand:SI 0 "register_operand" "=r,r")
2198 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2199 (parallel [(const_int 0)]))))]
2201 "%vcvttss2si\t{%1, %0|%0, %1}"
2202 [(set_attr "type" "sseicvt")
2203 (set_attr "athlon_decode" "double,vector")
2204 (set_attr "amdfam10_decode" "double,double")
2205 (set_attr "bdver1_decode" "double,double")
2206 (set_attr "prefix_rep" "1")
2207 (set_attr "prefix" "maybe_vex")
2208 (set_attr "mode" "SI")])
2210 (define_insn "sse_cvttss2siq"
2211 [(set (match_operand:DI 0 "register_operand" "=r,r")
2214 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2215 (parallel [(const_int 0)]))))]
2216 "TARGET_SSE && TARGET_64BIT"
2217 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2218 [(set_attr "type" "sseicvt")
2219 (set_attr "athlon_decode" "double,vector")
2220 (set_attr "amdfam10_decode" "double,double")
2221 (set_attr "bdver1_decode" "double,double")
2222 (set_attr "prefix_rep" "1")
2223 (set_attr "prefix" "maybe_vex")
2224 (set_attr "mode" "DI")])
2226 (define_insn "float<sseintvecmodelower><mode>2"
2227 [(set (match_operand:VF1 0 "register_operand" "=x")
2229 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2231 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "prefix" "maybe_vex")
2234 (set_attr "mode" "<sseinsnmode>")])
2236 (define_expand "floatuns<sseintvecmodelower><mode>2"
2237 [(match_operand:VF1 0 "register_operand" "")
2238 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2239 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2241 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2245 (define_insn "avx_cvtps2dq256"
2246 [(set (match_operand:V8SI 0 "register_operand" "=x")
2247 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2248 UNSPEC_FIX_NOTRUNC))]
2250 "vcvtps2dq\t{%1, %0|%0, %1}"
2251 [(set_attr "type" "ssecvt")
2252 (set_attr "prefix" "vex")
2253 (set_attr "mode" "OI")])
2255 (define_insn "sse2_cvtps2dq"
2256 [(set (match_operand:V4SI 0 "register_operand" "=x")
2257 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2258 UNSPEC_FIX_NOTRUNC))]
2260 "%vcvtps2dq\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set (attr "prefix_data16")
2264 (match_test "TARGET_AVX")
2266 (const_string "1")))
2267 (set_attr "prefix" "maybe_vex")
2268 (set_attr "mode" "TI")])
2270 (define_insn "fix_truncv8sfv8si2"
2271 [(set (match_operand:V8SI 0 "register_operand" "=x")
2272 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2274 "vcvttps2dq\t{%1, %0|%0, %1}"
2275 [(set_attr "type" "ssecvt")
2276 (set_attr "prefix" "vex")
2277 (set_attr "mode" "OI")])
2279 (define_insn "fix_truncv4sfv4si2"
2280 [(set (match_operand:V4SI 0 "register_operand" "=x")
2281 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2283 "%vcvttps2dq\t{%1, %0|%0, %1}"
2284 [(set_attr "type" "ssecvt")
2285 (set (attr "prefix_rep")
2287 (match_test "TARGET_AVX")
2289 (const_string "1")))
2290 (set (attr "prefix_data16")
2292 (match_test "TARGET_AVX")
2294 (const_string "0")))
2295 (set_attr "prefix_data16" "0")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "TI")])
2299 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2300 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2301 (match_operand:VF1 1 "register_operand" "")]
2305 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2306 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2307 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2308 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2314 ;; Parallel double-precision floating point conversion operations
2316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2318 (define_insn "sse2_cvtpi2pd"
2319 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2320 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2322 "cvtpi2pd\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set_attr "unit" "mmx,*")
2325 (set_attr "prefix_data16" "1,*")
2326 (set_attr "mode" "V2DF")])
2328 (define_insn "sse2_cvtpd2pi"
2329 [(set (match_operand:V2SI 0 "register_operand" "=y")
2330 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2331 UNSPEC_FIX_NOTRUNC))]
2333 "cvtpd2pi\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "ssecvt")
2335 (set_attr "unit" "mmx")
2336 (set_attr "bdver1_decode" "double")
2337 (set_attr "prefix_data16" "1")
2338 (set_attr "mode" "DI")])
2340 (define_insn "sse2_cvttpd2pi"
2341 [(set (match_operand:V2SI 0 "register_operand" "=y")
2342 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2344 "cvttpd2pi\t{%1, %0|%0, %1}"
2345 [(set_attr "type" "ssecvt")
2346 (set_attr "unit" "mmx")
2347 (set_attr "bdver1_decode" "double")
2348 (set_attr "prefix_data16" "1")
2349 (set_attr "mode" "TI")])
2351 (define_insn "sse2_cvtsi2sd"
2352 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2355 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2356 (match_operand:V2DF 1 "register_operand" "0,0,x")
2360 cvtsi2sd\t{%2, %0|%0, %2}
2361 cvtsi2sd\t{%2, %0|%0, %2}
2362 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2363 [(set_attr "isa" "noavx,noavx,avx")
2364 (set_attr "type" "sseicvt")
2365 (set_attr "athlon_decode" "double,direct,*")
2366 (set_attr "amdfam10_decode" "vector,double,*")
2367 (set_attr "bdver1_decode" "double,direct,*")
2368 (set_attr "prefix" "orig,orig,vex")
2369 (set_attr "mode" "DF")])
2371 (define_insn "sse2_cvtsi2sdq"
2372 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2375 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2376 (match_operand:V2DF 1 "register_operand" "0,0,x")
2378 "TARGET_SSE2 && TARGET_64BIT"
2380 cvtsi2sdq\t{%2, %0|%0, %2}
2381 cvtsi2sdq\t{%2, %0|%0, %2}
2382 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2383 [(set_attr "isa" "noavx,noavx,avx")
2384 (set_attr "type" "sseicvt")
2385 (set_attr "athlon_decode" "double,direct,*")
2386 (set_attr "amdfam10_decode" "vector,double,*")
2387 (set_attr "bdver1_decode" "double,direct,*")
2388 (set_attr "length_vex" "*,*,4")
2389 (set_attr "prefix_rex" "1,1,*")
2390 (set_attr "prefix" "orig,orig,vex")
2391 (set_attr "mode" "DF")])
2393 (define_insn "sse2_cvtsd2si"
2394 [(set (match_operand:SI 0 "register_operand" "=r,r")
2397 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2398 (parallel [(const_int 0)]))]
2399 UNSPEC_FIX_NOTRUNC))]
2401 "%vcvtsd2si\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "sseicvt")
2403 (set_attr "athlon_decode" "double,vector")
2404 (set_attr "bdver1_decode" "double,double")
2405 (set_attr "prefix_rep" "1")
2406 (set_attr "prefix" "maybe_vex")
2407 (set_attr "mode" "SI")])
2409 (define_insn "sse2_cvtsd2si_2"
2410 [(set (match_operand:SI 0 "register_operand" "=r,r")
2411 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2412 UNSPEC_FIX_NOTRUNC))]
2414 "%vcvtsd2si\t{%1, %0|%0, %1}"
2415 [(set_attr "type" "sseicvt")
2416 (set_attr "athlon_decode" "double,vector")
2417 (set_attr "amdfam10_decode" "double,double")
2418 (set_attr "bdver1_decode" "double,double")
2419 (set_attr "prefix_rep" "1")
2420 (set_attr "prefix" "maybe_vex")
2421 (set_attr "mode" "SI")])
2423 (define_insn "sse2_cvtsd2siq"
2424 [(set (match_operand:DI 0 "register_operand" "=r,r")
2427 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2428 (parallel [(const_int 0)]))]
2429 UNSPEC_FIX_NOTRUNC))]
2430 "TARGET_SSE2 && TARGET_64BIT"
2431 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2432 [(set_attr "type" "sseicvt")
2433 (set_attr "athlon_decode" "double,vector")
2434 (set_attr "bdver1_decode" "double,double")
2435 (set_attr "prefix_rep" "1")
2436 (set_attr "prefix" "maybe_vex")
2437 (set_attr "mode" "DI")])
2439 (define_insn "sse2_cvtsd2siq_2"
2440 [(set (match_operand:DI 0 "register_operand" "=r,r")
2441 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2442 UNSPEC_FIX_NOTRUNC))]
2443 "TARGET_SSE2 && TARGET_64BIT"
2444 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2445 [(set_attr "type" "sseicvt")
2446 (set_attr "athlon_decode" "double,vector")
2447 (set_attr "amdfam10_decode" "double,double")
2448 (set_attr "bdver1_decode" "double,double")
2449 (set_attr "prefix_rep" "1")
2450 (set_attr "prefix" "maybe_vex")
2451 (set_attr "mode" "DI")])
2453 (define_insn "sse2_cvttsd2si"
2454 [(set (match_operand:SI 0 "register_operand" "=r,r")
2457 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2458 (parallel [(const_int 0)]))))]
2460 "%vcvttsd2si\t{%1, %0|%0, %1}"
2461 [(set_attr "type" "sseicvt")
2462 (set_attr "athlon_decode" "double,vector")
2463 (set_attr "amdfam10_decode" "double,double")
2464 (set_attr "bdver1_decode" "double,double")
2465 (set_attr "prefix_rep" "1")
2466 (set_attr "prefix" "maybe_vex")
2467 (set_attr "mode" "SI")])
2469 (define_insn "sse2_cvttsd2siq"
2470 [(set (match_operand:DI 0 "register_operand" "=r,r")
2473 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2474 (parallel [(const_int 0)]))))]
2475 "TARGET_SSE2 && TARGET_64BIT"
2476 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2477 [(set_attr "type" "sseicvt")
2478 (set_attr "athlon_decode" "double,vector")
2479 (set_attr "amdfam10_decode" "double,double")
2480 (set_attr "bdver1_decode" "double,double")
2481 (set_attr "prefix_rep" "1")
2482 (set_attr "prefix" "maybe_vex")
2483 (set_attr "mode" "DI")])
2485 (define_insn "floatv4siv4df2"
2486 [(set (match_operand:V4DF 0 "register_operand" "=x")
2487 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2489 "vcvtdq2pd\t{%1, %0|%0, %1}"
2490 [(set_attr "type" "ssecvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "V4DF")])
2494 (define_insn "avx_cvtdq2pd256_2"
2495 [(set (match_operand:V4DF 0 "register_operand" "=x")
2498 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2499 (parallel [(const_int 0) (const_int 1)
2500 (const_int 2) (const_int 3)]))))]
2502 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2503 [(set_attr "type" "ssecvt")
2504 (set_attr "prefix" "vex")
2505 (set_attr "mode" "V4DF")])
2507 (define_insn "sse2_cvtdq2pd"
2508 [(set (match_operand:V2DF 0 "register_operand" "=x")
2511 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2512 (parallel [(const_int 0) (const_int 1)]))))]
2514 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2515 [(set_attr "type" "ssecvt")
2516 (set_attr "prefix" "maybe_vex")
2517 (set_attr "mode" "V2DF")])
2519 (define_insn "avx_cvtpd2dq256"
2520 [(set (match_operand:V4SI 0 "register_operand" "=x")
2521 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2522 UNSPEC_FIX_NOTRUNC))]
2524 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2525 [(set_attr "type" "ssecvt")
2526 (set_attr "prefix" "vex")
2527 (set_attr "mode" "OI")])
2529 (define_expand "avx_cvtpd2dq256_2"
2530 [(set (match_operand:V8SI 0 "register_operand" "")
2532 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2536 "operands[2] = CONST0_RTX (V4SImode);")
2538 (define_insn "*avx_cvtpd2dq256_2"
2539 [(set (match_operand:V8SI 0 "register_operand" "=x")
2541 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2543 (match_operand:V4SI 2 "const0_operand" "")))]
2545 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2546 [(set_attr "type" "ssecvt")
2547 (set_attr "prefix" "vex")
2548 (set_attr "mode" "OI")])
2550 (define_expand "sse2_cvtpd2dq"
2551 [(set (match_operand:V4SI 0 "register_operand" "")
2553 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2557 "operands[2] = CONST0_RTX (V2SImode);")
2559 (define_insn "*sse2_cvtpd2dq"
2560 [(set (match_operand:V4SI 0 "register_operand" "=x")
2562 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2564 (match_operand:V2SI 2 "const0_operand" "")))]
2568 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2570 return "cvtpd2dq\t{%1, %0|%0, %1}";
2572 [(set_attr "type" "ssecvt")
2573 (set_attr "prefix_rep" "1")
2574 (set_attr "prefix_data16" "0")
2575 (set_attr "prefix" "maybe_vex")
2576 (set_attr "mode" "TI")
2577 (set_attr "amdfam10_decode" "double")
2578 (set_attr "athlon_decode" "vector")
2579 (set_attr "bdver1_decode" "double")])
2581 (define_insn "fix_truncv4dfv4si2"
2582 [(set (match_operand:V4SI 0 "register_operand" "=x")
2583 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2585 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2586 [(set_attr "type" "ssecvt")
2587 (set_attr "prefix" "vex")
2588 (set_attr "mode" "OI")])
2590 (define_expand "avx_cvttpd2dq256_2"
2591 [(set (match_operand:V8SI 0 "register_operand" "")
2593 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2596 "operands[2] = CONST0_RTX (V4SImode);")
2598 (define_insn "*avx_cvttpd2dq256_2"
2599 [(set (match_operand:V8SI 0 "register_operand" "=x")
2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2602 (match_operand:V4SI 2 "const0_operand" "")))]
2604 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "prefix" "vex")
2607 (set_attr "mode" "OI")])
2609 (define_expand "sse2_cvttpd2dq"
2610 [(set (match_operand:V4SI 0 "register_operand" "")
2612 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2615 "operands[2] = CONST0_RTX (V2SImode);")
2617 (define_insn "*sse2_cvttpd2dq"
2618 [(set (match_operand:V4SI 0 "register_operand" "=x")
2620 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2621 (match_operand:V2SI 2 "const0_operand" "")))]
2625 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2627 return "cvttpd2dq\t{%1, %0|%0, %1}";
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "amdfam10_decode" "double")
2631 (set_attr "athlon_decode" "vector")
2632 (set_attr "bdver1_decode" "double")
2633 (set_attr "prefix" "maybe_vex")
2634 (set_attr "mode" "TI")])
2636 (define_insn "sse2_cvtsd2ss"
2637 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2640 (float_truncate:V2SF
2641 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2642 (match_operand:V4SF 1 "register_operand" "0,0,x")
2646 cvtsd2ss\t{%2, %0|%0, %2}
2647 cvtsd2ss\t{%2, %0|%0, %2}
2648 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,noavx,avx")
2650 (set_attr "type" "ssecvt")
2651 (set_attr "athlon_decode" "vector,double,*")
2652 (set_attr "amdfam10_decode" "vector,double,*")
2653 (set_attr "bdver1_decode" "direct,direct,*")
2654 (set_attr "prefix" "orig,orig,vex")
2655 (set_attr "mode" "SF")])
2657 (define_insn "sse2_cvtss2sd"
2658 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2662 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2663 (parallel [(const_int 0) (const_int 1)])))
2664 (match_operand:V2DF 1 "register_operand" "0,0,x")
2668 cvtss2sd\t{%2, %0|%0, %2}
2669 cvtss2sd\t{%2, %0|%0, %2}
2670 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2671 [(set_attr "isa" "noavx,noavx,avx")
2672 (set_attr "type" "ssecvt")
2673 (set_attr "amdfam10_decode" "vector,double,*")
2674 (set_attr "athlon_decode" "direct,direct,*")
2675 (set_attr "bdver1_decode" "direct,direct,*")
2676 (set_attr "prefix" "orig,orig,vex")
2677 (set_attr "mode" "DF")])
2679 (define_insn "avx_cvtpd2ps256"
2680 [(set (match_operand:V4SF 0 "register_operand" "=x")
2681 (float_truncate:V4SF
2682 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2684 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "vex")
2687 (set_attr "mode" "V4SF")])
2689 (define_expand "sse2_cvtpd2ps"
2690 [(set (match_operand:V4SF 0 "register_operand" "")
2692 (float_truncate:V2SF
2693 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2696 "operands[2] = CONST0_RTX (V2SFmode);")
2698 (define_insn "*sse2_cvtpd2ps"
2699 [(set (match_operand:V4SF 0 "register_operand" "=x")
2701 (float_truncate:V2SF
2702 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2703 (match_operand:V2SF 2 "const0_operand" "")))]
2707 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2709 return "cvtpd2ps\t{%1, %0|%0, %1}";
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "amdfam10_decode" "double")
2713 (set_attr "athlon_decode" "vector")
2714 (set_attr "bdver1_decode" "double")
2715 (set_attr "prefix_data16" "1")
2716 (set_attr "prefix" "maybe_vex")
2717 (set_attr "mode" "V4SF")])
2719 (define_insn "avx_cvtps2pd256"
2720 [(set (match_operand:V4DF 0 "register_operand" "=x")
2722 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2724 "vcvtps2pd\t{%1, %0|%0, %1}"
2725 [(set_attr "type" "ssecvt")
2726 (set_attr "prefix" "vex")
2727 (set_attr "mode" "V4DF")])
2729 (define_insn "*avx_cvtps2pd256_2"
2730 [(set (match_operand:V4DF 0 "register_operand" "=x")
2733 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2734 (parallel [(const_int 0) (const_int 1)
2735 (const_int 2) (const_int 3)]))))]
2737 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2738 [(set_attr "type" "ssecvt")
2739 (set_attr "prefix" "vex")
2740 (set_attr "mode" "V4DF")])
2742 (define_insn "sse2_cvtps2pd"
2743 [(set (match_operand:V2DF 0 "register_operand" "=x")
2746 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2747 (parallel [(const_int 0) (const_int 1)]))))]
2749 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2750 [(set_attr "type" "ssecvt")
2751 (set_attr "amdfam10_decode" "direct")
2752 (set_attr "athlon_decode" "double")
2753 (set_attr "bdver1_decode" "double")
2754 (set_attr "prefix_data16" "0")
2755 (set_attr "prefix" "maybe_vex")
2756 (set_attr "mode" "V2DF")])
2758 (define_expand "vec_unpacks_hi_v4sf"
2763 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2764 (parallel [(const_int 6) (const_int 7)
2765 (const_int 2) (const_int 3)])))
2766 (set (match_operand:V2DF 0 "register_operand" "")
2770 (parallel [(const_int 0) (const_int 1)]))))]
2772 "operands[2] = gen_reg_rtx (V4SFmode);")
2774 (define_expand "vec_unpacks_hi_v8sf"
2777 (match_operand:V8SF 1 "nonimmediate_operand" "")
2778 (parallel [(const_int 4) (const_int 5)
2779 (const_int 6) (const_int 7)])))
2780 (set (match_operand:V4DF 0 "register_operand" "")
2784 "operands[2] = gen_reg_rtx (V4SFmode);")
2786 (define_expand "vec_unpacks_lo_v4sf"
2787 [(set (match_operand:V2DF 0 "register_operand" "")
2790 (match_operand:V4SF 1 "nonimmediate_operand" "")
2791 (parallel [(const_int 0) (const_int 1)]))))]
2794 (define_expand "vec_unpacks_lo_v8sf"
2795 [(set (match_operand:V4DF 0 "register_operand" "")
2798 (match_operand:V8SF 1 "nonimmediate_operand" "")
2799 (parallel [(const_int 0) (const_int 1)
2800 (const_int 2) (const_int 3)]))))]
2803 (define_mode_attr sseunpackfltmode
2804 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2806 (define_expand "vec_unpacks_float_hi_<mode>"
2807 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2808 (match_operand:VI2_AVX2 1 "register_operand" "")]
2811 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2813 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2814 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2815 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2819 (define_expand "vec_unpacks_float_lo_<mode>"
2820 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2821 (match_operand:VI2_AVX2 1 "register_operand" "")]
2824 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2826 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2828 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2832 (define_expand "vec_unpacku_float_hi_<mode>"
2833 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2834 (match_operand:VI2_AVX2 1 "register_operand" "")]
2837 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2839 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2840 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2841 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2845 (define_expand "vec_unpacku_float_lo_<mode>"
2846 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2847 (match_operand:VI2_AVX2 1 "register_operand" "")]
2850 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2852 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2853 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2854 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2858 (define_expand "vec_unpacks_float_hi_v4si"
2861 (match_operand:V4SI 1 "nonimmediate_operand" "")
2862 (parallel [(const_int 2) (const_int 3)
2863 (const_int 2) (const_int 3)])))
2864 (set (match_operand:V2DF 0 "register_operand" "")
2868 (parallel [(const_int 0) (const_int 1)]))))]
2870 "operands[2] = gen_reg_rtx (V4SImode);")
2872 (define_expand "vec_unpacks_float_lo_v4si"
2873 [(set (match_operand:V2DF 0 "register_operand" "")
2876 (match_operand:V4SI 1 "nonimmediate_operand" "")
2877 (parallel [(const_int 0) (const_int 1)]))))]
2880 (define_expand "vec_unpacks_float_hi_v8si"
2883 (match_operand:V8SI 1 "nonimmediate_operand" "")
2884 (parallel [(const_int 4) (const_int 5)
2885 (const_int 6) (const_int 7)])))
2886 (set (match_operand:V4DF 0 "register_operand" "")
2890 "operands[2] = gen_reg_rtx (V4SImode);")
2892 (define_expand "vec_unpacks_float_lo_v8si"
2893 [(set (match_operand:V4DF 0 "register_operand" "")
2896 (match_operand:V8SI 1 "nonimmediate_operand" "")
2897 (parallel [(const_int 0) (const_int 1)
2898 (const_int 2) (const_int 3)]))))]
2901 (define_expand "vec_unpacku_float_hi_v4si"
2904 (match_operand:V4SI 1 "nonimmediate_operand" "")
2905 (parallel [(const_int 2) (const_int 3)
2906 (const_int 2) (const_int 3)])))
2911 (parallel [(const_int 0) (const_int 1)]))))
2913 (lt:V2DF (match_dup 6) (match_dup 3)))
2915 (and:V2DF (match_dup 7) (match_dup 4)))
2916 (set (match_operand:V2DF 0 "register_operand" "")
2917 (plus:V2DF (match_dup 6) (match_dup 8)))]
2920 REAL_VALUE_TYPE TWO32r;
2924 real_ldexp (&TWO32r, &dconst1, 32);
2925 x = const_double_from_real_value (TWO32r, DFmode);
2927 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2928 operands[4] = force_reg (V2DFmode,
2929 ix86_build_const_vector (V2DFmode, 1, x));
2931 operands[5] = gen_reg_rtx (V4SImode);
2933 for (i = 6; i < 9; i++)
2934 operands[i] = gen_reg_rtx (V2DFmode);
2937 (define_expand "vec_unpacku_float_lo_v4si"
2941 (match_operand:V4SI 1 "nonimmediate_operand" "")
2942 (parallel [(const_int 0) (const_int 1)]))))
2944 (lt:V2DF (match_dup 5) (match_dup 3)))
2946 (and:V2DF (match_dup 6) (match_dup 4)))
2947 (set (match_operand:V2DF 0 "register_operand" "")
2948 (plus:V2DF (match_dup 5) (match_dup 7)))]
2951 REAL_VALUE_TYPE TWO32r;
2955 real_ldexp (&TWO32r, &dconst1, 32);
2956 x = const_double_from_real_value (TWO32r, DFmode);
2958 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2959 operands[4] = force_reg (V2DFmode,
2960 ix86_build_const_vector (V2DFmode, 1, x));
2962 for (i = 5; i < 8; i++)
2963 operands[i] = gen_reg_rtx (V2DFmode);
2966 (define_expand "vec_unpacku_float_hi_v8si"
2967 [(match_operand:V4DF 0 "register_operand" "")
2968 (match_operand:V8SI 1 "register_operand" "")]
2971 REAL_VALUE_TYPE TWO32r;
2975 real_ldexp (&TWO32r, &dconst1, 32);
2976 x = const_double_from_real_value (TWO32r, DFmode);
2978 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2979 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2980 tmp[5] = gen_reg_rtx (V4SImode);
2982 for (i = 2; i < 5; i++)
2983 tmp[i] = gen_reg_rtx (V4DFmode);
2984 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2985 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2986 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2987 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2988 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2989 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2993 (define_expand "vec_unpacku_float_lo_v8si"
2994 [(match_operand:V4DF 0 "register_operand" "")
2995 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2998 REAL_VALUE_TYPE TWO32r;
3002 real_ldexp (&TWO32r, &dconst1, 32);
3003 x = const_double_from_real_value (TWO32r, DFmode);
3005 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3006 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3008 for (i = 2; i < 5; i++)
3009 tmp[i] = gen_reg_rtx (V4DFmode);
3010 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3011 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3012 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3013 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3014 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3018 (define_expand "vec_pack_trunc_v4df"
3020 (float_truncate:V4SF
3021 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3023 (float_truncate:V4SF
3024 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3025 (set (match_operand:V8SF 0 "register_operand" "")
3031 operands[3] = gen_reg_rtx (V4SFmode);
3032 operands[4] = gen_reg_rtx (V4SFmode);
3035 (define_expand "vec_pack_trunc_v2df"
3036 [(match_operand:V4SF 0 "register_operand" "")
3037 (match_operand:V2DF 1 "nonimmediate_operand" "")
3038 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3043 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3045 tmp0 = gen_reg_rtx (V4DFmode);
3046 tmp1 = force_reg (V2DFmode, operands[1]);
3048 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3049 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3053 tmp0 = gen_reg_rtx (V4SFmode);
3054 tmp1 = gen_reg_rtx (V4SFmode);
3056 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3057 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3058 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3063 (define_expand "vec_pack_sfix_trunc_v4df"
3064 [(match_operand:V8SI 0 "register_operand" "")
3065 (match_operand:V4DF 1 "nonimmediate_operand" "")
3066 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3071 r1 = gen_reg_rtx (V4SImode);
3072 r2 = gen_reg_rtx (V4SImode);
3074 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3075 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3076 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3080 (define_expand "vec_pack_sfix_trunc_v2df"
3081 [(match_operand:V4SI 0 "register_operand" "")
3082 (match_operand:V2DF 1 "nonimmediate_operand" "")
3083 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3088 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3090 tmp0 = gen_reg_rtx (V4DFmode);
3091 tmp1 = force_reg (V2DFmode, operands[1]);
3093 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3094 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3098 tmp0 = gen_reg_rtx (V4SImode);
3099 tmp1 = gen_reg_rtx (V4SImode);
3101 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3102 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3104 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3105 gen_lowpart (V2DImode, tmp0),
3106 gen_lowpart (V2DImode, tmp1)));
3111 (define_mode_attr ssepackfltmode
3112 [(V4DF "V8SI") (V2DF "V4SI")])
3114 (define_expand "vec_pack_ufix_trunc_<mode>"
3115 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3116 (match_operand:VF2 1 "register_operand" "")
3117 (match_operand:VF2 2 "register_operand" "")]
3121 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3122 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3123 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3124 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3125 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3127 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3128 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3132 tmp[5] = gen_reg_rtx (V8SFmode);
3133 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3134 gen_lowpart (V8SFmode, tmp[3]), 0);
3135 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3137 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3138 operands[0], 0, OPTAB_DIRECT);
3139 if (tmp[6] != operands[0])
3140 emit_move_insn (operands[0], tmp[6]);
3144 (define_expand "vec_pack_sfix_v4df"
3145 [(match_operand:V8SI 0 "register_operand" "")
3146 (match_operand:V4DF 1 "nonimmediate_operand" "")
3147 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3152 r1 = gen_reg_rtx (V4SImode);
3153 r2 = gen_reg_rtx (V4SImode);
3155 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3156 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3157 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3161 (define_expand "vec_pack_sfix_v2df"
3162 [(match_operand:V4SI 0 "register_operand" "")
3163 (match_operand:V2DF 1 "nonimmediate_operand" "")
3164 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3169 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3171 tmp0 = gen_reg_rtx (V4DFmode);
3172 tmp1 = force_reg (V2DFmode, operands[1]);
3174 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3175 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3179 tmp0 = gen_reg_rtx (V4SImode);
3180 tmp1 = gen_reg_rtx (V4SImode);
3182 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3183 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3185 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3186 gen_lowpart (V2DImode, tmp0),
3187 gen_lowpart (V2DImode, tmp1)));
3192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3194 ;; Parallel single-precision floating point element swizzling
3196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3198 (define_expand "sse_movhlps_exp"
3199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3202 (match_operand:V4SF 1 "nonimmediate_operand" "")
3203 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3204 (parallel [(const_int 6)
3210 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3212 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3214 /* Fix up the destination if needed. */
3215 if (dst != operands[0])
3216 emit_move_insn (operands[0], dst);
3221 (define_insn "sse_movhlps"
3222 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3225 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3226 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3227 (parallel [(const_int 6)
3231 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3233 movhlps\t{%2, %0|%0, %2}
3234 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3235 movlps\t{%H2, %0|%0, %H2}
3236 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3237 %vmovhps\t{%2, %0|%0, %2}"
3238 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3239 (set_attr "type" "ssemov")
3240 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3241 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3243 (define_expand "sse_movlhps_exp"
3244 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3247 (match_operand:V4SF 1 "nonimmediate_operand" "")
3248 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3249 (parallel [(const_int 0)
3255 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3257 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3259 /* Fix up the destination if needed. */
3260 if (dst != operands[0])
3261 emit_move_insn (operands[0], dst);
3266 (define_insn "sse_movlhps"
3267 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3270 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3271 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3272 (parallel [(const_int 0)
3276 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3278 movlhps\t{%2, %0|%0, %2}
3279 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3280 movhps\t{%2, %0|%0, %2}
3281 vmovhps\t{%2, %1, %0|%0, %1, %2}
3282 %vmovlps\t{%2, %H0|%H0, %2}"
3283 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3284 (set_attr "type" "ssemov")
3285 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3286 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3288 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3289 (define_insn "avx_unpckhps256"
3290 [(set (match_operand:V8SF 0 "register_operand" "=x")
3293 (match_operand:V8SF 1 "register_operand" "x")
3294 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3295 (parallel [(const_int 2) (const_int 10)
3296 (const_int 3) (const_int 11)
3297 (const_int 6) (const_int 14)
3298 (const_int 7) (const_int 15)])))]
3300 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3301 [(set_attr "type" "sselog")
3302 (set_attr "prefix" "vex")
3303 (set_attr "mode" "V8SF")])
3305 (define_expand "vec_interleave_highv8sf"
3309 (match_operand:V8SF 1 "register_operand" "x")
3310 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3311 (parallel [(const_int 0) (const_int 8)
3312 (const_int 1) (const_int 9)
3313 (const_int 4) (const_int 12)
3314 (const_int 5) (const_int 13)])))
3320 (parallel [(const_int 2) (const_int 10)
3321 (const_int 3) (const_int 11)
3322 (const_int 6) (const_int 14)
3323 (const_int 7) (const_int 15)])))
3324 (set (match_operand:V8SF 0 "register_operand" "")
3329 (parallel [(const_int 4) (const_int 5)
3330 (const_int 6) (const_int 7)
3331 (const_int 12) (const_int 13)
3332 (const_int 14) (const_int 15)])))]
3335 operands[3] = gen_reg_rtx (V8SFmode);
3336 operands[4] = gen_reg_rtx (V8SFmode);
3339 (define_insn "vec_interleave_highv4sf"
3340 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3343 (match_operand:V4SF 1 "register_operand" "0,x")
3344 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3345 (parallel [(const_int 2) (const_int 6)
3346 (const_int 3) (const_int 7)])))]
3349 unpckhps\t{%2, %0|%0, %2}
3350 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3351 [(set_attr "isa" "noavx,avx")
3352 (set_attr "type" "sselog")
3353 (set_attr "prefix" "orig,vex")
3354 (set_attr "mode" "V4SF")])
3356 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3357 (define_insn "avx_unpcklps256"
3358 [(set (match_operand:V8SF 0 "register_operand" "=x")
3361 (match_operand:V8SF 1 "register_operand" "x")
3362 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3363 (parallel [(const_int 0) (const_int 8)
3364 (const_int 1) (const_int 9)
3365 (const_int 4) (const_int 12)
3366 (const_int 5) (const_int 13)])))]
3368 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3369 [(set_attr "type" "sselog")
3370 (set_attr "prefix" "vex")
3371 (set_attr "mode" "V8SF")])
3373 (define_expand "vec_interleave_lowv8sf"
3377 (match_operand:V8SF 1 "register_operand" "x")
3378 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3379 (parallel [(const_int 0) (const_int 8)
3380 (const_int 1) (const_int 9)
3381 (const_int 4) (const_int 12)
3382 (const_int 5) (const_int 13)])))
3388 (parallel [(const_int 2) (const_int 10)
3389 (const_int 3) (const_int 11)
3390 (const_int 6) (const_int 14)
3391 (const_int 7) (const_int 15)])))
3392 (set (match_operand:V8SF 0 "register_operand" "")
3397 (parallel [(const_int 0) (const_int 1)
3398 (const_int 2) (const_int 3)
3399 (const_int 8) (const_int 9)
3400 (const_int 10) (const_int 11)])))]
3403 operands[3] = gen_reg_rtx (V8SFmode);
3404 operands[4] = gen_reg_rtx (V8SFmode);
3407 (define_insn "vec_interleave_lowv4sf"
3408 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3411 (match_operand:V4SF 1 "register_operand" "0,x")
3412 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3413 (parallel [(const_int 0) (const_int 4)
3414 (const_int 1) (const_int 5)])))]
3417 unpcklps\t{%2, %0|%0, %2}
3418 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3419 [(set_attr "isa" "noavx,avx")
3420 (set_attr "type" "sselog")
3421 (set_attr "prefix" "orig,vex")
3422 (set_attr "mode" "V4SF")])
3424 ;; These are modeled with the same vec_concat as the others so that we
3425 ;; capture users of shufps that can use the new instructions
3426 (define_insn "avx_movshdup256"
3427 [(set (match_operand:V8SF 0 "register_operand" "=x")
3430 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3432 (parallel [(const_int 1) (const_int 1)
3433 (const_int 3) (const_int 3)
3434 (const_int 5) (const_int 5)
3435 (const_int 7) (const_int 7)])))]
3437 "vmovshdup\t{%1, %0|%0, %1}"
3438 [(set_attr "type" "sse")
3439 (set_attr "prefix" "vex")
3440 (set_attr "mode" "V8SF")])
3442 (define_insn "sse3_movshdup"
3443 [(set (match_operand:V4SF 0 "register_operand" "=x")
3446 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3448 (parallel [(const_int 1)
3453 "%vmovshdup\t{%1, %0|%0, %1}"
3454 [(set_attr "type" "sse")
3455 (set_attr "prefix_rep" "1")
3456 (set_attr "prefix" "maybe_vex")
3457 (set_attr "mode" "V4SF")])
3459 (define_insn "avx_movsldup256"
3460 [(set (match_operand:V8SF 0 "register_operand" "=x")
3463 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3465 (parallel [(const_int 0) (const_int 0)
3466 (const_int 2) (const_int 2)
3467 (const_int 4) (const_int 4)
3468 (const_int 6) (const_int 6)])))]
3470 "vmovsldup\t{%1, %0|%0, %1}"
3471 [(set_attr "type" "sse")
3472 (set_attr "prefix" "vex")
3473 (set_attr "mode" "V8SF")])
3475 (define_insn "sse3_movsldup"
3476 [(set (match_operand:V4SF 0 "register_operand" "=x")
3479 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3481 (parallel [(const_int 0)
3486 "%vmovsldup\t{%1, %0|%0, %1}"
3487 [(set_attr "type" "sse")
3488 (set_attr "prefix_rep" "1")
3489 (set_attr "prefix" "maybe_vex")
3490 (set_attr "mode" "V4SF")])
3492 (define_expand "avx_shufps256"
3493 [(match_operand:V8SF 0 "register_operand" "")
3494 (match_operand:V8SF 1 "register_operand" "")
3495 (match_operand:V8SF 2 "nonimmediate_operand" "")
3496 (match_operand:SI 3 "const_int_operand" "")]
3499 int mask = INTVAL (operands[3]);
3500 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3501 GEN_INT ((mask >> 0) & 3),
3502 GEN_INT ((mask >> 2) & 3),
3503 GEN_INT (((mask >> 4) & 3) + 8),
3504 GEN_INT (((mask >> 6) & 3) + 8),
3505 GEN_INT (((mask >> 0) & 3) + 4),
3506 GEN_INT (((mask >> 2) & 3) + 4),
3507 GEN_INT (((mask >> 4) & 3) + 12),
3508 GEN_INT (((mask >> 6) & 3) + 12)));
3512 ;; One bit in mask selects 2 elements.
3513 (define_insn "avx_shufps256_1"
3514 [(set (match_operand:V8SF 0 "register_operand" "=x")
3517 (match_operand:V8SF 1 "register_operand" "x")
3518 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3519 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3520 (match_operand 4 "const_0_to_3_operand" "")
3521 (match_operand 5 "const_8_to_11_operand" "")
3522 (match_operand 6 "const_8_to_11_operand" "")
3523 (match_operand 7 "const_4_to_7_operand" "")
3524 (match_operand 8 "const_4_to_7_operand" "")
3525 (match_operand 9 "const_12_to_15_operand" "")
3526 (match_operand 10 "const_12_to_15_operand" "")])))]
3528 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3529 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3530 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3531 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3534 mask = INTVAL (operands[3]);
3535 mask |= INTVAL (operands[4]) << 2;
3536 mask |= (INTVAL (operands[5]) - 8) << 4;
3537 mask |= (INTVAL (operands[6]) - 8) << 6;
3538 operands[3] = GEN_INT (mask);
3540 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3542 [(set_attr "type" "sselog")
3543 (set_attr "length_immediate" "1")
3544 (set_attr "prefix" "vex")
3545 (set_attr "mode" "V8SF")])
3547 (define_expand "sse_shufps"
3548 [(match_operand:V4SF 0 "register_operand" "")
3549 (match_operand:V4SF 1 "register_operand" "")
3550 (match_operand:V4SF 2 "nonimmediate_operand" "")
3551 (match_operand:SI 3 "const_int_operand" "")]
3554 int mask = INTVAL (operands[3]);
3555 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3556 GEN_INT ((mask >> 0) & 3),
3557 GEN_INT ((mask >> 2) & 3),
3558 GEN_INT (((mask >> 4) & 3) + 4),
3559 GEN_INT (((mask >> 6) & 3) + 4)));
3563 (define_insn "sse_shufps_<mode>"
3564 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3565 (vec_select:VI4F_128
3566 (vec_concat:<ssedoublevecmode>
3567 (match_operand:VI4F_128 1 "register_operand" "0,x")
3568 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3569 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3570 (match_operand 4 "const_0_to_3_operand" "")
3571 (match_operand 5 "const_4_to_7_operand" "")
3572 (match_operand 6 "const_4_to_7_operand" "")])))]
3576 mask |= INTVAL (operands[3]) << 0;
3577 mask |= INTVAL (operands[4]) << 2;
3578 mask |= (INTVAL (operands[5]) - 4) << 4;
3579 mask |= (INTVAL (operands[6]) - 4) << 6;
3580 operands[3] = GEN_INT (mask);
3582 switch (which_alternative)
3585 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3587 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3592 [(set_attr "isa" "noavx,avx")
3593 (set_attr "type" "sselog")
3594 (set_attr "length_immediate" "1")
3595 (set_attr "prefix" "orig,vex")
3596 (set_attr "mode" "V4SF")])
3598 (define_insn "sse_storehps"
3599 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3601 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3602 (parallel [(const_int 2) (const_int 3)])))]
3605 %vmovhps\t{%1, %0|%0, %1}
3606 %vmovhlps\t{%1, %d0|%d0, %1}
3607 %vmovlps\t{%H1, %d0|%d0, %H1}"
3608 [(set_attr "type" "ssemov")
3609 (set_attr "prefix" "maybe_vex")
3610 (set_attr "mode" "V2SF,V4SF,V2SF")])
3612 (define_expand "sse_loadhps_exp"
3613 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3616 (match_operand:V4SF 1 "nonimmediate_operand" "")
3617 (parallel [(const_int 0) (const_int 1)]))
3618 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3621 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3623 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3625 /* Fix up the destination if needed. */
3626 if (dst != operands[0])
3627 emit_move_insn (operands[0], dst);
3632 (define_insn "sse_loadhps"
3633 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3636 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3637 (parallel [(const_int 0) (const_int 1)]))
3638 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3641 movhps\t{%2, %0|%0, %2}
3642 vmovhps\t{%2, %1, %0|%0, %1, %2}
3643 movlhps\t{%2, %0|%0, %2}
3644 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3645 %vmovlps\t{%2, %H0|%H0, %2}"
3646 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3647 (set_attr "type" "ssemov")
3648 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3649 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3651 (define_insn "sse_storelps"
3652 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3654 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3655 (parallel [(const_int 0) (const_int 1)])))]
3658 %vmovlps\t{%1, %0|%0, %1}
3659 %vmovaps\t{%1, %0|%0, %1}
3660 %vmovlps\t{%1, %d0|%d0, %1}"
3661 [(set_attr "type" "ssemov")
3662 (set_attr "prefix" "maybe_vex")
3663 (set_attr "mode" "V2SF,V4SF,V2SF")])
3665 (define_expand "sse_loadlps_exp"
3666 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3668 (match_operand:V2SF 2 "nonimmediate_operand" "")
3670 (match_operand:V4SF 1 "nonimmediate_operand" "")
3671 (parallel [(const_int 2) (const_int 3)]))))]
3674 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3676 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3678 /* Fix up the destination if needed. */
3679 if (dst != operands[0])
3680 emit_move_insn (operands[0], dst);
3685 (define_insn "sse_loadlps"
3686 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3688 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3690 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3691 (parallel [(const_int 2) (const_int 3)]))))]
3694 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3695 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3696 movlps\t{%2, %0|%0, %2}
3697 vmovlps\t{%2, %1, %0|%0, %1, %2}
3698 %vmovlps\t{%2, %0|%0, %2}"
3699 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3700 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3701 (set_attr "length_immediate" "1,1,*,*,*")
3702 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3703 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3705 (define_insn "sse_movss"
3706 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3708 (match_operand:V4SF 2 "register_operand" " x,x")
3709 (match_operand:V4SF 1 "register_operand" " 0,x")
3713 movss\t{%2, %0|%0, %2}
3714 vmovss\t{%2, %1, %0|%0, %1, %2}"
3715 [(set_attr "isa" "noavx,avx")
3716 (set_attr "type" "ssemov")
3717 (set_attr "prefix" "orig,vex")
3718 (set_attr "mode" "SF")])
3720 (define_insn "avx2_vec_dup<mode>"
3721 [(set (match_operand:VF1 0 "register_operand" "=x")
3724 (match_operand:V4SF 1 "register_operand" "x")
3725 (parallel [(const_int 0)]))))]
3727 "vbroadcastss\t{%1, %0|%0, %1}"
3728 [(set_attr "type" "sselog1")
3729 (set_attr "prefix" "vex")
3730 (set_attr "mode" "<MODE>")])
3732 (define_insn "vec_dupv4sf"
3733 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3735 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3738 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3739 vbroadcastss\t{%1, %0|%0, %1}
3740 shufps\t{$0, %0, %0|%0, %0, 0}"
3741 [(set_attr "isa" "avx,avx,noavx")
3742 (set_attr "type" "sselog1,ssemov,sselog1")
3743 (set_attr "length_immediate" "1,0,1")
3744 (set_attr "prefix_extra" "0,1,*")
3745 (set_attr "prefix" "vex,vex,orig")
3746 (set_attr "mode" "V4SF")])
3748 ;; Although insertps takes register source, we prefer
3749 ;; unpcklps with register source since it is shorter.
3750 (define_insn "*vec_concatv2sf_sse4_1"
3751 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3753 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3754 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3757 unpcklps\t{%2, %0|%0, %2}
3758 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3759 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3760 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3761 %vmovss\t{%1, %0|%0, %1}
3762 punpckldq\t{%2, %0|%0, %2}
3763 movd\t{%1, %0|%0, %1}"
3764 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3765 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3766 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3767 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3768 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3769 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3770 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3772 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3773 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3774 ;; alternatives pretty much forces the MMX alternative to be chosen.
3775 (define_insn "*vec_concatv2sf_sse"
3776 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3778 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3779 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3782 unpcklps\t{%2, %0|%0, %2}
3783 movss\t{%1, %0|%0, %1}
3784 punpckldq\t{%2, %0|%0, %2}
3785 movd\t{%1, %0|%0, %1}"
3786 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3787 (set_attr "mode" "V4SF,SF,DI,DI")])
3789 (define_insn "*vec_concatv4sf"
3790 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3792 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3793 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3796 movlhps\t{%2, %0|%0, %2}
3797 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3798 movhps\t{%2, %0|%0, %2}
3799 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3800 [(set_attr "isa" "noavx,avx,noavx,avx")
3801 (set_attr "type" "ssemov")
3802 (set_attr "prefix" "orig,vex,orig,vex")
3803 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3805 (define_expand "vec_init<mode>"
3806 [(match_operand:V_128 0 "register_operand" "")
3807 (match_operand 1 "" "")]
3810 ix86_expand_vector_init (false, operands[0], operands[1]);
3814 ;; Avoid combining registers from different units in a single alternative,
3815 ;; see comment above inline_secondary_memory_needed function in i386.c
3816 (define_insn "vec_set<mode>_0"
3817 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3818 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3820 (vec_duplicate:VI4F_128
3821 (match_operand:<ssescalarmode> 2 "general_operand"
3822 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3823 (match_operand:VI4F_128 1 "vector_move_operand"
3824 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3828 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3829 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3830 %vmovd\t{%2, %0|%0, %2}
3831 movss\t{%2, %0|%0, %2}
3832 movss\t{%2, %0|%0, %2}
3833 vmovss\t{%2, %1, %0|%0, %1, %2}
3834 pinsrd\t{$0, %2, %0|%0, %2, 0}
3835 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3839 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3841 (cond [(eq_attr "alternative" "0,6,7")
3842 (const_string "sselog")
3843 (eq_attr "alternative" "9")
3844 (const_string "fmov")
3845 (eq_attr "alternative" "10")
3846 (const_string "imov")
3848 (const_string "ssemov")))
3849 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3850 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3851 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3852 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3854 ;; A subset is vec_setv4sf.
3855 (define_insn "*vec_setv4sf_sse4_1"
3856 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3859 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3860 (match_operand:V4SF 1 "register_operand" "0,x")
3861 (match_operand:SI 3 "const_int_operand" "")))]
3863 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3864 < GET_MODE_NUNITS (V4SFmode))"
3866 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3867 switch (which_alternative)
3870 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3872 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3877 [(set_attr "isa" "noavx,avx")
3878 (set_attr "type" "sselog")
3879 (set_attr "prefix_data16" "1,*")
3880 (set_attr "prefix_extra" "1")
3881 (set_attr "length_immediate" "1")
3882 (set_attr "prefix" "orig,vex")
3883 (set_attr "mode" "V4SF")])
3885 (define_insn "sse4_1_insertps"
3886 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3887 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3888 (match_operand:V4SF 1 "register_operand" "0,x")
3889 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3893 if (MEM_P (operands[2]))
3895 unsigned count_s = INTVAL (operands[3]) >> 6;
3897 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3898 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3900 switch (which_alternative)
3903 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3905 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3910 [(set_attr "isa" "noavx,avx")
3911 (set_attr "type" "sselog")
3912 (set_attr "prefix_data16" "1,*")
3913 (set_attr "prefix_extra" "1")
3914 (set_attr "length_immediate" "1")
3915 (set_attr "prefix" "orig,vex")
3916 (set_attr "mode" "V4SF")])
3919 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3921 (vec_duplicate:VI4F_128
3922 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3925 "TARGET_SSE && reload_completed"
3928 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3933 (define_expand "vec_set<mode>"
3934 [(match_operand:V 0 "register_operand" "")
3935 (match_operand:<ssescalarmode> 1 "register_operand" "")
3936 (match_operand 2 "const_int_operand" "")]
3939 ix86_expand_vector_set (false, operands[0], operands[1],
3940 INTVAL (operands[2]));
3944 (define_insn_and_split "*vec_extractv4sf_0"
3945 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3947 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3948 (parallel [(const_int 0)])))]
3949 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3951 "&& reload_completed"
3954 rtx op1 = operands[1];
3956 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3958 op1 = gen_lowpart (SFmode, op1);
3959 emit_move_insn (operands[0], op1);
3963 (define_insn_and_split "*sse4_1_extractps"
3964 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3966 (match_operand:V4SF 1 "register_operand" "x,0,x")
3967 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3970 %vextractps\t{%2, %1, %0|%0, %1, %2}
3973 "&& reload_completed && SSE_REG_P (operands[0])"
3976 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3977 switch (INTVAL (operands[2]))
3981 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3982 operands[2], operands[2],
3983 GEN_INT (INTVAL (operands[2]) + 4),
3984 GEN_INT (INTVAL (operands[2]) + 4)));
3987 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3990 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3995 [(set_attr "isa" "*,noavx,avx")
3996 (set_attr "type" "sselog,*,*")
3997 (set_attr "prefix_data16" "1,*,*")
3998 (set_attr "prefix_extra" "1,*,*")
3999 (set_attr "length_immediate" "1,*,*")
4000 (set_attr "prefix" "maybe_vex,*,*")
4001 (set_attr "mode" "V4SF,*,*")])
4003 (define_insn_and_split "*vec_extract_v4sf_mem"
4004 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4006 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4007 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4010 "&& reload_completed"
4013 int i = INTVAL (operands[2]);
4015 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4019 (define_expand "avx_vextractf128<mode>"
4020 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4021 (match_operand:V_256 1 "register_operand" "")
4022 (match_operand:SI 2 "const_0_to_1_operand" "")]
4025 rtx (*insn)(rtx, rtx);
4027 switch (INTVAL (operands[2]))
4030 insn = gen_vec_extract_lo_<mode>;
4033 insn = gen_vec_extract_hi_<mode>;
4039 emit_insn (insn (operands[0], operands[1]));
4043 (define_insn_and_split "vec_extract_lo_<mode>"
4044 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4045 (vec_select:<ssehalfvecmode>
4046 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4047 (parallel [(const_int 0) (const_int