1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
91 (define_c_enum "unspecv" [
101 ;; All vector modes including V?TImode, used in move patterns.
102 (define_mode_iterator V16
103 [(V32QI "TARGET_AVX") V16QI
104 (V16HI "TARGET_AVX") V8HI
105 (V8SI "TARGET_AVX") V4SI
106 (V4DI "TARGET_AVX") V2DI
107 (V2TI "TARGET_AVX") V1TI
108 (V8SF "TARGET_AVX") V4SF
109 (V4DF "TARGET_AVX") V2DF])
112 (define_mode_iterator V
113 [(V32QI "TARGET_AVX") V16QI
114 (V16HI "TARGET_AVX") V8HI
115 (V8SI "TARGET_AVX") V4SI
116 (V4DI "TARGET_AVX") V2DI
117 (V8SF "TARGET_AVX") V4SF
118 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
120 ;; All 128bit vector modes
121 (define_mode_iterator V_128
122 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
124 ;; All 256bit vector modes
125 (define_mode_iterator V_256
126 [V32QI V16HI V8SI V4DI V8SF V4DF])
128 ;; All vector float modes
129 (define_mode_iterator VF
130 [(V8SF "TARGET_AVX") V4SF
131 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
133 ;; All SFmode vector float modes
134 (define_mode_iterator VF1
135 [(V8SF "TARGET_AVX") V4SF])
137 ;; All DFmode vector float modes
138 (define_mode_iterator VF2
139 [(V4DF "TARGET_AVX") V2DF])
141 ;; All 128bit vector float modes
142 (define_mode_iterator VF_128
143 [V4SF (V2DF "TARGET_SSE2")])
145 ;; All 256bit vector float modes
146 (define_mode_iterator VF_256
149 ;; All vector integer modes
150 (define_mode_iterator VI
151 [(V32QI "TARGET_AVX") V16QI
152 (V16HI "TARGET_AVX") V8HI
153 (V8SI "TARGET_AVX") V4SI
154 (V4DI "TARGET_AVX") V2DI])
156 (define_mode_iterator VI_AVX2
157 [(V32QI "TARGET_AVX2") V16QI
158 (V16HI "TARGET_AVX2") V8HI
159 (V8SI "TARGET_AVX2") V4SI
160 (V4DI "TARGET_AVX2") V2DI])
162 ;; All QImode vector integer modes
163 (define_mode_iterator VI1
164 [(V32QI "TARGET_AVX") V16QI])
166 ;; All DImode vector integer modes
167 (define_mode_iterator VI8
168 [(V4DI "TARGET_AVX") V2DI])
170 (define_mode_iterator VI1_AVX2
171 [(V32QI "TARGET_AVX2") V16QI])
173 (define_mode_iterator VI2_AVX2
174 [(V16HI "TARGET_AVX2") V8HI])
176 (define_mode_iterator VI4_AVX2
177 [(V8SI "TARGET_AVX2") V4SI])
179 (define_mode_iterator VI8_AVX2
180 [(V4DI "TARGET_AVX2") V2DI])
182 ;; ??? We should probably use TImode instead.
183 (define_mode_iterator VIMAX_AVX2
184 [(V2TI "TARGET_AVX2") V1TI])
186 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
187 (define_mode_iterator SSESCALARMODE
188 [(V2TI "TARGET_AVX2") TI])
190 (define_mode_iterator VI12_AVX2
191 [(V32QI "TARGET_AVX2") V16QI
192 (V16HI "TARGET_AVX2") V8HI])
194 (define_mode_iterator VI24_AVX2
195 [(V16HI "TARGET_AVX2") V8HI
196 (V8SI "TARGET_AVX2") V4SI])
198 (define_mode_iterator VI124_AVX2
199 [(V32QI "TARGET_AVX2") V16QI
200 (V16HI "TARGET_AVX2") V8HI
201 (V8SI "TARGET_AVX2") V4SI])
203 (define_mode_iterator VI248_AVX2
204 [(V16HI "TARGET_AVX2") V8HI
205 (V8SI "TARGET_AVX2") V4SI
206 (V4DI "TARGET_AVX2") V2DI])
208 (define_mode_iterator VI48_AVX2
209 [(V8SI "TARGET_AVX2") V4SI
210 (V4DI "TARGET_AVX2") V2DI])
212 (define_mode_iterator V48_AVX2
215 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
216 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
218 (define_mode_attr sse2_avx2
219 [(V16QI "sse2") (V32QI "avx2")
220 (V8HI "sse2") (V16HI "avx2")
221 (V4SI "sse2") (V8SI "avx2")
222 (V2DI "sse2") (V4DI "avx2")
223 (V1TI "sse2") (V2TI "avx2")])
225 (define_mode_attr ssse3_avx2
226 [(V16QI "ssse3") (V32QI "avx2")
227 (V8HI "ssse3") (V16HI "avx2")
228 (V4SI "ssse3") (V8SI "avx2")
229 (V2DI "ssse3") (V4DI "avx2")
230 (TI "ssse3") (V2TI "avx2")])
232 (define_mode_attr sse4_1_avx2
233 [(V16QI "sse4_1") (V32QI "avx2")
234 (V8HI "sse4_1") (V16HI "avx2")
235 (V4SI "sse4_1") (V8SI "avx2")
236 (V2DI "sse4_1") (V4DI "avx2")])
238 (define_mode_attr avx_avx2
239 [(V4SF "avx") (V2DF "avx")
240 (V8SF "avx") (V4DF "avx")
241 (V4SI "avx2") (V2DI "avx2")
242 (V8SI "avx2") (V4DI "avx2")])
244 (define_mode_attr vec_avx2
245 [(V16QI "vec") (V32QI "avx2")
246 (V8HI "vec") (V16HI "avx2")
247 (V4SI "vec") (V8SI "avx2")
248 (V2DI "vec") (V4DI "avx2")])
250 (define_mode_attr ssedoublemode
251 [(V16HI "V16SI") (V8HI "V8SI")])
253 (define_mode_attr ssebytemode
254 [(V4DI "V32QI") (V2DI "V16QI")])
256 ;; All 128bit vector integer modes
257 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
259 ;; All 256bit vector integer modes
260 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
262 ;; Random 128bit vector integer mode combinations
263 (define_mode_iterator VI12_128 [V16QI V8HI])
264 (define_mode_iterator VI14_128 [V16QI V4SI])
265 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
266 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
267 (define_mode_iterator VI24_128 [V8HI V4SI])
268 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
269 (define_mode_iterator VI48_128 [V4SI V2DI])
271 ;; Random 256bit vector integer mode combinations
272 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
273 (define_mode_iterator VI48_256 [V8SI V4DI])
275 ;; Int-float size matches
276 (define_mode_iterator VI4F_128 [V4SI V4SF])
277 (define_mode_iterator VI8F_128 [V2DI V2DF])
278 (define_mode_iterator VI4F_256 [V8SI V8SF])
279 (define_mode_iterator VI8F_256 [V4DI V4DF])
281 ;; Mapping from float mode to required SSE level
282 (define_mode_attr sse
283 [(SF "sse") (DF "sse2")
284 (V4SF "sse") (V2DF "sse2")
285 (V8SF "avx") (V4DF "avx")])
287 (define_mode_attr sse2
288 [(V16QI "sse2") (V32QI "avx")
289 (V2DI "sse2") (V4DI "avx")])
291 (define_mode_attr sse3
292 [(V16QI "sse3") (V32QI "avx")])
294 (define_mode_attr sse4_1
295 [(V4SF "sse4_1") (V2DF "sse4_1")
296 (V8SF "avx") (V4DF "avx")])
298 (define_mode_attr avxsizesuffix
299 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
300 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
301 (V8SF "256") (V4DF "256")
302 (V4SF "") (V2DF "")])
304 ;; SSE instruction mode
305 (define_mode_attr sseinsnmode
306 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
307 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
308 (V8SF "V8SF") (V4DF "V4DF")
309 (V4SF "V4SF") (V2DF "V2DF")
312 ;; Mapping of vector float modes to an integer mode of the same size
313 (define_mode_attr sseintvecmode
314 [(V8SF "V8SI") (V4DF "V4DI")
315 (V4SF "V4SI") (V2DF "V2DI")
316 (V8SI "V8SI") (V4DI "V4DI")
317 (V4SI "V4SI") (V2DI "V2DI")
318 (V16HI "V16HI") (V8HI "V8HI")
319 (V32QI "V32QI") (V16QI "V16QI")])
321 (define_mode_attr sseintvecmodelower
322 [(V8SF "v8si") (V4DF "v4di")
323 (V4SF "v4si") (V2DF "v2di")
324 (V8SI "v8si") (V4DI "v4di")
325 (V4SI "v4si") (V2DI "v2di")
326 (V16HI "v16hi") (V8HI "v8hi")
327 (V32QI "v32qi") (V16QI "v16qi")])
329 ;; Mapping of vector modes to a vector mode of double size
330 (define_mode_attr ssedoublevecmode
331 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
332 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
333 (V8SF "V16SF") (V4DF "V8DF")
334 (V4SF "V8SF") (V2DF "V4DF")])
336 ;; Mapping of vector modes to a vector mode of half size
337 (define_mode_attr ssehalfvecmode
338 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
339 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
340 (V8SF "V4SF") (V4DF "V2DF")
343 ;; Mapping of vector modes back to the scalar modes
344 (define_mode_attr ssescalarmode
345 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
346 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
347 (V8SF "SF") (V4DF "DF")
348 (V4SF "SF") (V2DF "DF")])
350 ;; Number of scalar elements in each vector type
351 (define_mode_attr ssescalarnum
352 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
353 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
354 (V8SF "8") (V4DF "4")
355 (V4SF "4") (V2DF "2")])
357 ;; SSE prefix for integer vector modes
358 (define_mode_attr sseintprefix
359 [(V2DI "p") (V2DF "")
362 (V8SI "p") (V8SF "")])
364 ;; SSE scalar suffix for vector modes
365 (define_mode_attr ssescalarmodesuffix
367 (V8SF "ss") (V4DF "sd")
368 (V4SF "ss") (V2DF "sd")
369 (V8SI "ss") (V4DI "sd")
372 ;; Pack/unpack vector modes
373 (define_mode_attr sseunpackmode
374 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
375 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
377 (define_mode_attr ssepackmode
378 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
379 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
381 ;; Mapping of the max integer size for xop rotate immediate constraint
382 (define_mode_attr sserotatemax
383 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
385 ;; Mapping of mode to cast intrinsic name
386 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
388 ;; Instruction suffix for sign and zero extensions.
389 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
391 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
392 (define_mode_attr i128
393 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
394 (V8SI "%~128") (V4DI "%~128")])
397 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
399 ;; Mapping of immediate bits for blend instructions
400 (define_mode_attr blendbits
401 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
403 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411 ;; All of these patterns are enabled for SSE1 as well as SSE2.
412 ;; This is essential for maintaining stable calling conventions.
414 (define_expand "mov<mode>"
415 [(set (match_operand:V16 0 "nonimmediate_operand" "")
416 (match_operand:V16 1 "nonimmediate_operand" ""))]
419 ix86_expand_vector_move (<MODE>mode, operands);
423 (define_insn "*mov<mode>_internal"
424 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
425 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
427 && (register_operand (operands[0], <MODE>mode)
428 || register_operand (operands[1], <MODE>mode))"
430 switch (which_alternative)
433 return standard_sse_constant_opcode (insn, operands[1]);
436 switch (get_attr_mode (insn))
441 && (misaligned_operand (operands[0], <MODE>mode)
442 || misaligned_operand (operands[1], <MODE>mode)))
443 return "vmovups\t{%1, %0|%0, %1}";
445 return "%vmovaps\t{%1, %0|%0, %1}";
450 && (misaligned_operand (operands[0], <MODE>mode)
451 || misaligned_operand (operands[1], <MODE>mode)))
452 return "vmovupd\t{%1, %0|%0, %1}";
453 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
454 return "%vmovaps\t{%1, %0|%0, %1}";
456 return "%vmovapd\t{%1, %0|%0, %1}";
461 && (misaligned_operand (operands[0], <MODE>mode)
462 || misaligned_operand (operands[1], <MODE>mode)))
463 return "vmovdqu\t{%1, %0|%0, %1}";
464 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
465 return "%vmovaps\t{%1, %0|%0, %1}";
467 return "%vmovdqa\t{%1, %0|%0, %1}";
476 [(set_attr "type" "sselog1,ssemov,ssemov")
477 (set_attr "prefix" "maybe_vex")
479 (cond [(match_test "TARGET_AVX")
480 (const_string "<sseinsnmode>")
481 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
482 (not (match_test "TARGET_SSE2")))
483 (and (eq_attr "alternative" "2")
484 (match_test "TARGET_SSE_TYPELESS_STORES")))
485 (const_string "V4SF")
486 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
487 (const_string "V4SF")
488 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
489 (const_string "V2DF")
491 (const_string "TI")))])
493 (define_insn "sse2_movq128"
494 [(set (match_operand:V2DI 0 "register_operand" "=x")
497 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
498 (parallel [(const_int 0)]))
501 "%vmovq\t{%1, %0|%0, %1}"
502 [(set_attr "type" "ssemov")
503 (set_attr "prefix" "maybe_vex")
504 (set_attr "mode" "TI")])
506 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
507 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
508 ;; from memory, we'd prefer to load the memory directly into the %xmm
509 ;; register. To facilitate this happy circumstance, this pattern won't
510 ;; split until after register allocation. If the 64-bit value didn't
511 ;; come from memory, this is the best we can do. This is much better
512 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
515 (define_insn_and_split "movdi_to_sse"
517 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
518 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
519 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
520 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
522 "&& reload_completed"
525 if (register_operand (operands[1], DImode))
527 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
528 Assemble the 64-bit DImode value in an xmm register. */
529 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
530 gen_rtx_SUBREG (SImode, operands[1], 0)));
531 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
532 gen_rtx_SUBREG (SImode, operands[1], 4)));
533 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
536 else if (memory_operand (operands[1], DImode))
537 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
538 operands[1], const0_rtx));
544 [(set (match_operand:V4SF 0 "register_operand" "")
545 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
546 "TARGET_SSE && reload_completed"
549 (vec_duplicate:V4SF (match_dup 1))
553 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
554 operands[2] = CONST0_RTX (V4SFmode);
558 [(set (match_operand:V2DF 0 "register_operand" "")
559 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
560 "TARGET_SSE2 && reload_completed"
561 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
563 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
564 operands[2] = CONST0_RTX (DFmode);
567 (define_expand "push<mode>1"
568 [(match_operand:V16 0 "register_operand" "")]
571 ix86_expand_push (<MODE>mode, operands[0]);
575 (define_expand "movmisalign<mode>"
576 [(set (match_operand:V16 0 "nonimmediate_operand" "")
577 (match_operand:V16 1 "nonimmediate_operand" ""))]
580 ix86_expand_vector_move_misalign (<MODE>mode, operands);
584 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
585 [(set (match_operand:VF 0 "register_operand" "=x")
587 [(match_operand:VF 1 "memory_operand" "m")]
590 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
591 [(set_attr "type" "ssemov")
592 (set_attr "movu" "1")
593 (set_attr "prefix" "maybe_vex")
594 (set_attr "mode" "<MODE>")])
596 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "memory_operand" "=m")
599 [(match_operand:VF 1 "register_operand" "x")]
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
608 (define_insn "<sse2>_loaddqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "register_operand" "=x")
610 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
613 "%vmovdqu\t{%1, %0|%0, %1}"
614 [(set_attr "type" "ssemov")
615 (set_attr "movu" "1")
616 (set (attr "prefix_data16")
618 (match_test "TARGET_AVX")
621 (set_attr "prefix" "maybe_vex")
622 (set_attr "mode" "<sseinsnmode>")])
624 (define_insn "<sse2>_storedqu<avxsizesuffix>"
625 [(set (match_operand:VI1 0 "memory_operand" "=m")
626 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
629 "%vmovdqu\t{%1, %0|%0, %1}"
630 [(set_attr "type" "ssemov")
631 (set_attr "movu" "1")
632 (set (attr "prefix_data16")
634 (match_test "TARGET_AVX")
637 (set_attr "prefix" "maybe_vex")
638 (set_attr "mode" "<sseinsnmode>")])
640 (define_insn "<sse3>_lddqu<avxsizesuffix>"
641 [(set (match_operand:VI1 0 "register_operand" "=x")
642 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
645 "%vlddqu\t{%1, %0|%0, %1}"
646 [(set_attr "type" "ssemov")
647 (set_attr "movu" "1")
648 (set (attr "prefix_data16")
650 (match_test "TARGET_AVX")
653 (set (attr "prefix_rep")
655 (match_test "TARGET_AVX")
658 (set_attr "prefix" "maybe_vex")
659 (set_attr "mode" "<sseinsnmode>")])
661 (define_insn "sse2_movnti<mode>"
662 [(set (match_operand:SWI48 0 "memory_operand" "=m")
663 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
666 "movnti\t{%1, %0|%0, %1}"
667 [(set_attr "type" "ssemov")
668 (set_attr "prefix_data16" "0")
669 (set_attr "mode" "<MODE>")])
671 (define_insn "<sse>_movnt<mode>"
672 [(set (match_operand:VF 0 "memory_operand" "=m")
673 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
676 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
677 [(set_attr "type" "ssemov")
678 (set_attr "prefix" "maybe_vex")
679 (set_attr "mode" "<MODE>")])
681 (define_insn "<sse2>_movnt<mode>"
682 [(set (match_operand:VI8 0 "memory_operand" "=m")
683 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
686 "%vmovntdq\t{%1, %0|%0, %1}"
687 [(set_attr "type" "ssecvt")
688 (set (attr "prefix_data16")
690 (match_test "TARGET_AVX")
693 (set_attr "prefix" "maybe_vex")
694 (set_attr "mode" "<sseinsnmode>")])
696 ; Expand patterns for non-temporal stores. At the moment, only those
697 ; that directly map to insns are defined; it would be possible to
698 ; define patterns for other modes that would expand to several insns.
700 ;; Modes handled by storent patterns.
701 (define_mode_iterator STORENT_MODE
702 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
703 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
704 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
705 (V8SF "TARGET_AVX") V4SF
706 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
708 (define_expand "storent<mode>"
709 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
711 [(match_operand:STORENT_MODE 1 "register_operand" "")]
715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
717 ;; Parallel floating point arithmetic
719 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
721 (define_expand "<code><mode>2"
722 [(set (match_operand:VF 0 "register_operand" "")
724 (match_operand:VF 1 "register_operand" "")))]
726 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
728 (define_insn_and_split "*absneg<mode>2"
729 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
730 (match_operator:VF 3 "absneg_operator"
731 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
732 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
735 "&& reload_completed"
738 enum rtx_code absneg_op;
744 if (MEM_P (operands[1]))
745 op1 = operands[2], op2 = operands[1];
747 op1 = operands[1], op2 = operands[2];
752 if (rtx_equal_p (operands[0], operands[1]))
758 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
759 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
760 t = gen_rtx_SET (VOIDmode, operands[0], t);
764 [(set_attr "isa" "noavx,noavx,avx,avx")])
766 (define_expand "<plusminus_insn><mode>3"
767 [(set (match_operand:VF 0 "register_operand" "")
769 (match_operand:VF 1 "nonimmediate_operand" "")
770 (match_operand:VF 2 "nonimmediate_operand" "")))]
772 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
774 (define_insn "*<plusminus_insn><mode>3"
775 [(set (match_operand:VF 0 "register_operand" "=x,x")
777 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
778 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
779 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
781 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
782 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
783 [(set_attr "isa" "noavx,avx")
784 (set_attr "type" "sseadd")
785 (set_attr "prefix" "orig,vex")
786 (set_attr "mode" "<MODE>")])
788 (define_insn "<sse>_vm<plusminus_insn><mode>3"
789 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
792 (match_operand:VF_128 1 "register_operand" "0,x")
793 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
798 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
799 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
800 [(set_attr "isa" "noavx,avx")
801 (set_attr "type" "sseadd")
802 (set_attr "prefix" "orig,vex")
803 (set_attr "mode" "<ssescalarmode>")])
805 (define_expand "mul<mode>3"
806 [(set (match_operand:VF 0 "register_operand" "")
808 (match_operand:VF 1 "nonimmediate_operand" "")
809 (match_operand:VF 2 "nonimmediate_operand" "")))]
811 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
813 (define_insn "*mul<mode>3"
814 [(set (match_operand:VF 0 "register_operand" "=x,x")
816 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
820 mul<ssemodesuffix>\t{%2, %0|%0, %2}
821 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
822 [(set_attr "isa" "noavx,avx")
823 (set_attr "type" "ssemul")
824 (set_attr "prefix" "orig,vex")
825 (set_attr "mode" "<MODE>")])
827 (define_insn "<sse>_vmmul<mode>3"
828 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
831 (match_operand:VF_128 1 "register_operand" "0,x")
832 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
837 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
838 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
839 [(set_attr "isa" "noavx,avx")
840 (set_attr "type" "ssemul")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "<ssescalarmode>")])
844 (define_expand "div<mode>3"
845 [(set (match_operand:VF2 0 "register_operand" "")
846 (div:VF2 (match_operand:VF2 1 "register_operand" "")
847 (match_operand:VF2 2 "nonimmediate_operand" "")))]
849 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
851 (define_expand "div<mode>3"
852 [(set (match_operand:VF1 0 "register_operand" "")
853 (div:VF1 (match_operand:VF1 1 "register_operand" "")
854 (match_operand:VF1 2 "nonimmediate_operand" "")))]
857 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
860 && TARGET_RECIP_VEC_DIV
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
865 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
870 (define_insn "<sse>_div<mode>3"
871 [(set (match_operand:VF 0 "register_operand" "=x,x")
873 (match_operand:VF 1 "register_operand" "0,x")
874 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
877 div<ssemodesuffix>\t{%2, %0|%0, %2}
878 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
879 [(set_attr "isa" "noavx,avx")
880 (set_attr "type" "ssediv")
881 (set_attr "prefix" "orig,vex")
882 (set_attr "mode" "<MODE>")])
884 (define_insn "<sse>_vmdiv<mode>3"
885 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
888 (match_operand:VF_128 1 "register_operand" "0,x")
889 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
894 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
895 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
896 [(set_attr "isa" "noavx,avx")
897 (set_attr "type" "ssediv")
898 (set_attr "prefix" "orig,vex")
899 (set_attr "mode" "<ssescalarmode>")])
901 (define_insn "<sse>_rcp<mode>2"
902 [(set (match_operand:VF1 0 "register_operand" "=x")
904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
906 "%vrcpps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "atom_sse_attr" "rcp")
909 (set_attr "prefix" "maybe_vex")
910 (set_attr "mode" "<MODE>")])
912 (define_insn "sse_vmrcpv4sf2"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
917 (match_operand:V4SF 2 "register_operand" "0,x")
921 rcpss\t{%1, %0|%0, %1}
922 vrcpss\t{%1, %2, %0|%0, %2, %1}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sse")
925 (set_attr "atom_sse_attr" "rcp")
926 (set_attr "prefix" "orig,vex")
927 (set_attr "mode" "SF")])
929 (define_expand "sqrt<mode>2"
930 [(set (match_operand:VF2 0 "register_operand" "")
931 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
934 (define_expand "sqrt<mode>2"
935 [(set (match_operand:VF1 0 "register_operand" "")
936 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
940 && TARGET_RECIP_VEC_SQRT
941 && !optimize_insn_for_size_p ()
942 && flag_finite_math_only && !flag_trapping_math
943 && flag_unsafe_math_optimizations)
945 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
950 (define_insn "<sse>_sqrt<mode>2"
951 [(set (match_operand:VF 0 "register_operand" "=x")
952 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
954 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
955 [(set_attr "type" "sse")
956 (set_attr "atom_sse_attr" "sqrt")
957 (set_attr "prefix" "maybe_vex")
958 (set_attr "mode" "<MODE>")])
960 (define_insn "<sse>_vmsqrt<mode>2"
961 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
964 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
965 (match_operand:VF_128 2 "register_operand" "0,x")
969 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
970 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
971 [(set_attr "isa" "noavx,avx")
972 (set_attr "type" "sse")
973 (set_attr "atom_sse_attr" "sqrt")
974 (set_attr "prefix" "orig,vex")
975 (set_attr "mode" "<ssescalarmode>")])
977 (define_expand "rsqrt<mode>2"
978 [(set (match_operand:VF1 0 "register_operand" "")
980 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
983 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
987 (define_insn "<sse>_rsqrt<mode>2"
988 [(set (match_operand:VF1 0 "register_operand" "=x")
990 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
992 "%vrsqrtps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "sse")
994 (set_attr "prefix" "maybe_vex")
995 (set_attr "mode" "<MODE>")])
997 (define_insn "sse_vmrsqrtv4sf2"
998 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1000 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1002 (match_operand:V4SF 2 "register_operand" "0,x")
1006 rsqrtss\t{%1, %0|%0, %1}
1007 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1008 [(set_attr "isa" "noavx,avx")
1009 (set_attr "type" "sse")
1010 (set_attr "prefix" "orig,vex")
1011 (set_attr "mode" "SF")])
1013 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1014 ;; isn't really correct, as those rtl operators aren't defined when
1015 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1017 (define_expand "<code><mode>3"
1018 [(set (match_operand:VF 0 "register_operand" "")
1020 (match_operand:VF 1 "nonimmediate_operand" "")
1021 (match_operand:VF 2 "nonimmediate_operand" "")))]
1024 if (!flag_finite_math_only)
1025 operands[1] = force_reg (<MODE>mode, operands[1]);
1026 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1029 (define_insn "*<code><mode>3_finite"
1030 [(set (match_operand:VF 0 "register_operand" "=x,x")
1032 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1033 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1034 "TARGET_SSE && flag_finite_math_only
1035 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1037 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1038 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1039 [(set_attr "isa" "noavx,avx")
1040 (set_attr "type" "sseadd")
1041 (set_attr "prefix" "orig,vex")
1042 (set_attr "mode" "<MODE>")])
1044 (define_insn "*<code><mode>3"
1045 [(set (match_operand:VF 0 "register_operand" "=x,x")
1047 (match_operand:VF 1 "register_operand" "0,x")
1048 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1049 "TARGET_SSE && !flag_finite_math_only"
1051 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1052 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1053 [(set_attr "isa" "noavx,avx")
1054 (set_attr "type" "sseadd")
1055 (set_attr "prefix" "orig,vex")
1056 (set_attr "mode" "<MODE>")])
1058 (define_insn "<sse>_vm<code><mode>3"
1059 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1062 (match_operand:VF_128 1 "register_operand" "0,x")
1063 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1068 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1069 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070 [(set_attr "isa" "noavx,avx")
1071 (set_attr "type" "sse")
1072 (set_attr "prefix" "orig,vex")
1073 (set_attr "mode" "<ssescalarmode>")])
1075 ;; These versions of the min/max patterns implement exactly the operations
1076 ;; min = (op1 < op2 ? op1 : op2)
1077 ;; max = (!(op1 < op2) ? op1 : op2)
1078 ;; Their operands are not commutative, and thus they may be used in the
1079 ;; presence of -0.0 and NaN.
1081 (define_insn "*ieee_smin<mode>3"
1082 [(set (match_operand:VF 0 "register_operand" "=x,x")
1084 [(match_operand:VF 1 "register_operand" "0,x")
1085 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1089 min<ssemodesuffix>\t{%2, %0|%0, %2}
1090 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1091 [(set_attr "isa" "noavx,avx")
1092 (set_attr "type" "sseadd")
1093 (set_attr "prefix" "orig,vex")
1094 (set_attr "mode" "<MODE>")])
1096 (define_insn "*ieee_smax<mode>3"
1097 [(set (match_operand:VF 0 "register_operand" "=x,x")
1099 [(match_operand:VF 1 "register_operand" "0,x")
1100 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1104 max<ssemodesuffix>\t{%2, %0|%0, %2}
1105 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1106 [(set_attr "isa" "noavx,avx")
1107 (set_attr "type" "sseadd")
1108 (set_attr "prefix" "orig,vex")
1109 (set_attr "mode" "<MODE>")])
1111 (define_insn "avx_addsubv4df3"
1112 [(set (match_operand:V4DF 0 "register_operand" "=x")
1115 (match_operand:V4DF 1 "register_operand" "x")
1116 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1117 (minus:V4DF (match_dup 1) (match_dup 2))
1120 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "type" "sseadd")
1122 (set_attr "prefix" "vex")
1123 (set_attr "mode" "V4DF")])
1125 (define_insn "sse3_addsubv2df3"
1126 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1129 (match_operand:V2DF 1 "register_operand" "0,x")
1130 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1131 (minus:V2DF (match_dup 1) (match_dup 2))
1135 addsubpd\t{%2, %0|%0, %2}
1136 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1137 [(set_attr "isa" "noavx,avx")
1138 (set_attr "type" "sseadd")
1139 (set_attr "atom_unit" "complex")
1140 (set_attr "prefix" "orig,vex")
1141 (set_attr "mode" "V2DF")])
1143 (define_insn "avx_addsubv8sf3"
1144 [(set (match_operand:V8SF 0 "register_operand" "=x")
1147 (match_operand:V8SF 1 "register_operand" "x")
1148 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V8SF (match_dup 1) (match_dup 2))
1152 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix" "vex")
1155 (set_attr "mode" "V8SF")])
1157 (define_insn "sse3_addsubv4sf3"
1158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1161 (match_operand:V4SF 1 "register_operand" "0,x")
1162 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1163 (minus:V4SF (match_dup 1) (match_dup 2))
1167 addsubps\t{%2, %0|%0, %2}
1168 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1169 [(set_attr "isa" "noavx,avx")
1170 (set_attr "type" "sseadd")
1171 (set_attr "prefix" "orig,vex")
1172 (set_attr "prefix_rep" "1,*")
1173 (set_attr "mode" "V4SF")])
1175 (define_insn "avx_h<plusminus_insn>v4df3"
1176 [(set (match_operand:V4DF 0 "register_operand" "=x")
1181 (match_operand:V4DF 1 "register_operand" "x")
1182 (parallel [(const_int 0)]))
1183 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1186 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1187 (parallel [(const_int 0)]))
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1191 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1194 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1197 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1198 [(set_attr "type" "sseadd")
1199 (set_attr "prefix" "vex")
1200 (set_attr "mode" "V4DF")])
1202 (define_insn "sse3_h<plusminus_insn>v2df3"
1203 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1207 (match_operand:V2DF 1 "register_operand" "0,x")
1208 (parallel [(const_int 0)]))
1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1213 (parallel [(const_int 0)]))
1214 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1217 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1218 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1219 [(set_attr "isa" "noavx,avx")
1220 (set_attr "type" "sseadd")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "V2DF")])
1224 (define_insn "avx_h<plusminus_insn>v8sf3"
1225 [(set (match_operand:V8SF 0 "register_operand" "=x")
1231 (match_operand:V8SF 1 "register_operand" "x")
1232 (parallel [(const_int 0)]))
1233 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1235 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1240 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1241 (parallel [(const_int 0)]))
1242 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1245 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1249 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1250 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1252 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1256 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1257 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1259 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1262 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263 [(set_attr "type" "sseadd")
1264 (set_attr "prefix" "vex")
1265 (set_attr "mode" "V8SF")])
1267 (define_insn "sse3_h<plusminus_insn>v4sf3"
1268 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V4SF 1 "register_operand" "0,x")
1274 (parallel [(const_int 0)]))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1282 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1283 (parallel [(const_int 0)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1286 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1287 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1290 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1291 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1292 [(set_attr "isa" "noavx,avx")
1293 (set_attr "type" "sseadd")
1294 (set_attr "atom_unit" "complex")
1295 (set_attr "prefix" "orig,vex")
1296 (set_attr "prefix_rep" "1,*")
1297 (set_attr "mode" "V4SF")])
1299 (define_expand "reduc_splus_v4df"
1300 [(match_operand:V4DF 0 "register_operand" "")
1301 (match_operand:V4DF 1 "register_operand" "")]
1304 rtx tmp = gen_reg_rtx (V4DFmode);
1305 rtx tmp2 = gen_reg_rtx (V4DFmode);
1306 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1307 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1308 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1312 (define_expand "reduc_splus_v2df"
1313 [(match_operand:V2DF 0 "register_operand" "")
1314 (match_operand:V2DF 1 "register_operand" "")]
1317 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1321 (define_expand "reduc_splus_v8sf"
1322 [(match_operand:V8SF 0 "register_operand" "")
1323 (match_operand:V8SF 1 "register_operand" "")]
1326 rtx tmp = gen_reg_rtx (V8SFmode);
1327 rtx tmp2 = gen_reg_rtx (V8SFmode);
1328 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1329 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1330 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1331 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1335 (define_expand "reduc_splus_v4sf"
1336 [(match_operand:V4SF 0 "register_operand" "")
1337 (match_operand:V4SF 1 "register_operand" "")]
1342 rtx tmp = gen_reg_rtx (V4SFmode);
1343 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1344 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1347 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1351 ;; Modes handled by reduc_sm{in,ax}* patterns.
1352 (define_mode_iterator REDUC_SMINMAX_MODE
1353 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1354 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1355 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1356 (V4SF "TARGET_SSE")])
1358 (define_expand "reduc_<code>_<mode>"
1359 [(smaxmin:REDUC_SMINMAX_MODE
1360 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1361 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1364 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1368 (define_expand "reduc_<code>_<mode>"
1370 (match_operand:VI_256 0 "register_operand" "")
1371 (match_operand:VI_256 1 "register_operand" ""))]
1374 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1378 (define_expand "reduc_umin_v8hi"
1380 (match_operand:V8HI 0 "register_operand" "")
1381 (match_operand:V8HI 1 "register_operand" ""))]
1384 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 ;; Parallel floating point comparisons
1392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1394 (define_insn "avx_cmp<mode>3"
1395 [(set (match_operand:VF 0 "register_operand" "=x")
1397 [(match_operand:VF 1 "register_operand" "x")
1398 (match_operand:VF 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1402 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "length_immediate" "1")
1405 (set_attr "prefix" "vex")
1406 (set_attr "mode" "<MODE>")])
1408 (define_insn "avx_vmcmp<mode>3"
1409 [(set (match_operand:VF_128 0 "register_operand" "=x")
1412 [(match_operand:VF_128 1 "register_operand" "x")
1413 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1414 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1419 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1420 [(set_attr "type" "ssecmp")
1421 (set_attr "length_immediate" "1")
1422 (set_attr "prefix" "vex")
1423 (set_attr "mode" "<ssescalarmode>")])
1425 (define_insn "*<sse>_maskcmp<mode>3_comm"
1426 [(set (match_operand:VF 0 "register_operand" "=x,x")
1427 (match_operator:VF 3 "sse_comparison_operator"
1428 [(match_operand:VF 1 "register_operand" "%0,x")
1429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1431 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1433 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1434 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1435 [(set_attr "isa" "noavx,avx")
1436 (set_attr "type" "ssecmp")
1437 (set_attr "length_immediate" "1")
1438 (set_attr "prefix" "orig,vex")
1439 (set_attr "mode" "<MODE>")])
1441 (define_insn "<sse>_maskcmp<mode>3"
1442 [(set (match_operand:VF 0 "register_operand" "=x,x")
1443 (match_operator:VF 3 "sse_comparison_operator"
1444 [(match_operand:VF 1 "register_operand" "0,x")
1445 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1448 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1449 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1450 [(set_attr "isa" "noavx,avx")
1451 (set_attr "type" "ssecmp")
1452 (set_attr "length_immediate" "1")
1453 (set_attr "prefix" "orig,vex")
1454 (set_attr "mode" "<MODE>")])
1456 (define_insn "<sse>_vmmaskcmp<mode>3"
1457 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1459 (match_operator:VF_128 3 "sse_comparison_operator"
1460 [(match_operand:VF_128 1 "register_operand" "0,x")
1461 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1466 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1467 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1468 [(set_attr "isa" "noavx,avx")
1469 (set_attr "type" "ssecmp")
1470 (set_attr "length_immediate" "1,*")
1471 (set_attr "prefix" "orig,vex")
1472 (set_attr "mode" "<ssescalarmode>")])
1474 (define_insn "<sse>_comi"
1475 [(set (reg:CCFP FLAGS_REG)
1478 (match_operand:<ssevecmode> 0 "register_operand" "x")
1479 (parallel [(const_int 0)]))
1481 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1482 (parallel [(const_int 0)]))))]
1483 "SSE_FLOAT_MODE_P (<MODE>mode)"
1484 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1485 [(set_attr "type" "ssecomi")
1486 (set_attr "prefix" "maybe_vex")
1487 (set_attr "prefix_rep" "0")
1488 (set (attr "prefix_data16")
1489 (if_then_else (eq_attr "mode" "DF")
1491 (const_string "0")))
1492 (set_attr "mode" "<MODE>")])
1494 (define_insn "<sse>_ucomi"
1495 [(set (reg:CCFPU FLAGS_REG)
1498 (match_operand:<ssevecmode> 0 "register_operand" "x")
1499 (parallel [(const_int 0)]))
1501 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1502 (parallel [(const_int 0)]))))]
1503 "SSE_FLOAT_MODE_P (<MODE>mode)"
1504 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1505 [(set_attr "type" "ssecomi")
1506 (set_attr "prefix" "maybe_vex")
1507 (set_attr "prefix_rep" "0")
1508 (set (attr "prefix_data16")
1509 (if_then_else (eq_attr "mode" "DF")
1511 (const_string "0")))
1512 (set_attr "mode" "<MODE>")])
1514 (define_expand "vcond<V_256:mode><VF_256:mode>"
1515 [(set (match_operand:V_256 0 "register_operand" "")
1517 (match_operator 3 ""
1518 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1519 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1520 (match_operand:V_256 1 "general_operand" "")
1521 (match_operand:V_256 2 "general_operand" "")))]
1523 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1524 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1526 bool ok = ix86_expand_fp_vcond (operands);
1531 (define_expand "vcond<V_128:mode><VF_128:mode>"
1532 [(set (match_operand:V_128 0 "register_operand" "")
1534 (match_operator 3 ""
1535 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1536 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1537 (match_operand:V_128 1 "general_operand" "")
1538 (match_operand:V_128 2 "general_operand" "")))]
1540 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1541 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1543 bool ok = ix86_expand_fp_vcond (operands);
1548 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1550 ;; Parallel floating point logical operations
1552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1554 (define_insn "<sse>_andnot<mode>3"
1555 [(set (match_operand:VF 0 "register_operand" "=x,x")
1558 (match_operand:VF 1 "register_operand" "0,x"))
1559 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1562 static char buf[32];
1565 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1567 switch (which_alternative)
1570 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1573 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1579 snprintf (buf, sizeof (buf), insn, suffix);
1582 [(set_attr "isa" "noavx,avx")
1583 (set_attr "type" "sselog")
1584 (set_attr "prefix" "orig,vex")
1585 (set_attr "mode" "<MODE>")])
1587 (define_expand "<code><mode>3"
1588 [(set (match_operand:VF 0 "register_operand" "")
1590 (match_operand:VF 1 "nonimmediate_operand" "")
1591 (match_operand:VF 2 "nonimmediate_operand" "")))]
1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1595 (define_insn "*<code><mode>3"
1596 [(set (match_operand:VF 0 "register_operand" "=x,x")
1598 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1599 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1600 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1602 static char buf[32];
1605 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1607 switch (which_alternative)
1610 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1613 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1619 snprintf (buf, sizeof (buf), insn, suffix);
1622 [(set_attr "isa" "noavx,avx")
1623 (set_attr "type" "sselog")
1624 (set_attr "prefix" "orig,vex")
1625 (set_attr "mode" "<MODE>")])
1627 (define_expand "copysign<mode>3"
1630 (not:VF (match_dup 3))
1631 (match_operand:VF 1 "nonimmediate_operand" "")))
1633 (and:VF (match_dup 3)
1634 (match_operand:VF 2 "nonimmediate_operand" "")))
1635 (set (match_operand:VF 0 "register_operand" "")
1636 (ior:VF (match_dup 4) (match_dup 5)))]
1639 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1641 operands[4] = gen_reg_rtx (<MODE>mode);
1642 operands[5] = gen_reg_rtx (<MODE>mode);
1645 ;; Also define scalar versions. These are used for abs, neg, and
1646 ;; conditional move. Using subregs into vector modes causes register
1647 ;; allocation lossage. These patterns do not allow memory operands
1648 ;; because the native instructions read the full 128-bits.
1650 (define_insn "*andnot<mode>3"
1651 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1654 (match_operand:MODEF 1 "register_operand" "0,x"))
1655 (match_operand:MODEF 2 "register_operand" "x,x")))]
1656 "SSE_FLOAT_MODE_P (<MODE>mode)"
1658 static char buf[32];
1661 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1663 switch (which_alternative)
1666 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1669 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1675 snprintf (buf, sizeof (buf), insn, suffix);
1678 [(set_attr "isa" "noavx,avx")
1679 (set_attr "type" "sselog")
1680 (set_attr "prefix" "orig,vex")
1681 (set_attr "mode" "<ssevecmode>")])
1683 (define_insn "*<code><mode>3"
1684 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1686 (match_operand:MODEF 1 "register_operand" "%0,x")
1687 (match_operand:MODEF 2 "register_operand" "x,x")))]
1688 "SSE_FLOAT_MODE_P (<MODE>mode)"
1690 static char buf[32];
1693 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1695 switch (which_alternative)
1698 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1701 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1707 snprintf (buf, sizeof (buf), insn, suffix);
1710 [(set_attr "isa" "noavx,avx")
1711 (set_attr "type" "sselog")
1712 (set_attr "prefix" "orig,vex")
1713 (set_attr "mode" "<ssevecmode>")])
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; FMA floating point multiply/accumulate instructions. These include
1718 ;; scalar versions of the instructions as well as vector versions.
1720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1722 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
1724 ;; The standard names for fma is only available with SSE math enabled.
1725 (define_expand "fma<mode>4"
1726 [(set (match_operand:FMAMODE 0 "register_operand")
1728 (match_operand:FMAMODE 1 "nonimmediate_operand")
1729 (match_operand:FMAMODE 2 "nonimmediate_operand")
1730 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1731 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1733 (define_expand "fms<mode>4"
1734 [(set (match_operand:FMAMODE 0 "register_operand")
1736 (match_operand:FMAMODE 1 "nonimmediate_operand")
1737 (match_operand:FMAMODE 2 "nonimmediate_operand")
1738 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1739 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1741 (define_expand "fnma<mode>4"
1742 [(set (match_operand:FMAMODE 0 "register_operand")
1744 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1745 (match_operand:FMAMODE 2 "nonimmediate_operand")
1746 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1747 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1749 (define_expand "fnms<mode>4"
1750 [(set (match_operand:FMAMODE 0 "register_operand")
1752 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1753 (match_operand:FMAMODE 2 "nonimmediate_operand")
1754 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1755 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1757 ;; The builtin for intrinsics is not constrained by SSE math enabled.
1758 (define_expand "fma4i_fmadd_<mode>"
1759 [(set (match_operand:FMAMODE 0 "register_operand")
1761 (match_operand:FMAMODE 1 "nonimmediate_operand")
1762 (match_operand:FMAMODE 2 "nonimmediate_operand")
1763 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1764 "TARGET_FMA || TARGET_FMA4")
1766 (define_insn "*fma_fmadd_<mode>"
1767 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1769 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1770 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1771 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1772 "TARGET_FMA || TARGET_FMA4"
1774 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1775 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1776 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1777 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1778 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1779 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1780 (set_attr "type" "ssemuladd")
1781 (set_attr "mode" "<MODE>")])
1783 (define_insn "*fma_fmsub_<mode>"
1784 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1786 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1787 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1789 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1790 "TARGET_FMA || TARGET_FMA4"
1792 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1793 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1794 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1795 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1796 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1797 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1798 (set_attr "type" "ssemuladd")
1799 (set_attr "mode" "<MODE>")])
1801 (define_insn "*fma_fnmadd_<mode>"
1802 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1805 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1806 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1807 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1808 "TARGET_FMA || TARGET_FMA4"
1810 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1811 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1812 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1813 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1814 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1815 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1816 (set_attr "type" "ssemuladd")
1817 (set_attr "mode" "<MODE>")])
1819 (define_insn "*fma_fnmsub_<mode>"
1820 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1823 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1824 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1826 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1827 "TARGET_FMA || TARGET_FMA4"
1829 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1830 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1831 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1832 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1833 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1834 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1835 (set_attr "type" "ssemuladd")
1836 (set_attr "mode" "<MODE>")])
1838 ;; FMA parallel floating point multiply addsub and subadd operations.
1840 ;; It would be possible to represent these without the UNSPEC as
1843 ;; (fma op1 op2 op3)
1844 ;; (fma op1 op2 (neg op3))
1847 ;; But this doesn't seem useful in practice.
1849 (define_expand "fmaddsub_<mode>"
1850 [(set (match_operand:VF 0 "register_operand")
1852 [(match_operand:VF 1 "nonimmediate_operand")
1853 (match_operand:VF 2 "nonimmediate_operand")
1854 (match_operand:VF 3 "nonimmediate_operand")]
1856 "TARGET_FMA || TARGET_FMA4")
1858 (define_insn "*fma_fmaddsub_<mode>"
1859 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1861 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1862 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1863 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1865 "TARGET_FMA || TARGET_FMA4"
1867 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1868 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1869 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1870 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1871 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1872 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1873 (set_attr "type" "ssemuladd")
1874 (set_attr "mode" "<MODE>")])
1876 (define_insn "*fma_fmsubadd_<mode>"
1877 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1879 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1880 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1882 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1884 "TARGET_FMA || TARGET_FMA4"
1886 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1887 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1888 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1889 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1890 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1892 (set_attr "type" "ssemuladd")
1893 (set_attr "mode" "<MODE>")])
1895 ;; FMA3 floating point scalar intrinsics. These merge result with
1896 ;; high-order elements from the destination register.
1898 (define_expand "fmai_vmfmadd_<mode>"
1899 [(set (match_operand:VF_128 0 "register_operand")
1902 (match_operand:VF_128 1 "nonimmediate_operand")
1903 (match_operand:VF_128 2 "nonimmediate_operand")
1904 (match_operand:VF_128 3 "nonimmediate_operand"))
1909 (define_insn "*fmai_fmadd_<mode>"
1910 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1913 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1914 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1915 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1920 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1921 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1922 [(set_attr "type" "ssemuladd")
1923 (set_attr "mode" "<MODE>")])
1925 (define_insn "*fmai_fmsub_<mode>"
1926 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1929 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1930 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1932 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1937 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1938 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1939 [(set_attr "type" "ssemuladd")
1940 (set_attr "mode" "<MODE>")])
1942 (define_insn "*fmai_fnmadd_<mode>"
1943 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1947 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1948 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1949 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1954 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1955 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1956 [(set_attr "type" "ssemuladd")
1957 (set_attr "mode" "<MODE>")])
1959 (define_insn "*fmai_fnmsub_<mode>"
1960 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1964 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1965 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1967 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1972 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1973 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1974 [(set_attr "type" "ssemuladd")
1975 (set_attr "mode" "<MODE>")])
1977 ;; FMA4 floating point scalar intrinsics. These write the
1978 ;; entire destination register, with the high-order elements zeroed.
1980 (define_expand "fma4i_vmfmadd_<mode>"
1981 [(set (match_operand:VF_128 0 "register_operand")
1984 (match_operand:VF_128 1 "nonimmediate_operand")
1985 (match_operand:VF_128 2 "nonimmediate_operand")
1986 (match_operand:VF_128 3 "nonimmediate_operand"))
1991 operands[4] = CONST0_RTX (<MODE>mode);
1994 (define_insn "*fma4i_vmfmadd_<mode>"
1995 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1998 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1999 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2000 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2001 (match_operand:VF_128 4 "const0_operand" "")
2004 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2005 [(set_attr "type" "ssemuladd")
2006 (set_attr "mode" "<MODE>")])
2008 (define_insn "*fma4i_vmfmsub_<mode>"
2009 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2012 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2013 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2015 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2016 (match_operand:VF_128 4 "const0_operand" "")
2019 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2020 [(set_attr "type" "ssemuladd")
2021 (set_attr "mode" "<MODE>")])
2023 (define_insn "*fma4i_vmfnmadd_<mode>"
2024 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2028 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2029 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2030 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2031 (match_operand:VF_128 4 "const0_operand" "")
2034 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<MODE>")])
2038 (define_insn "*fma4i_vmfnmsub_<mode>"
2039 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2043 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2044 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2046 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2047 (match_operand:VF_128 4 "const0_operand" "")
2050 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2051 [(set_attr "type" "ssemuladd")
2052 (set_attr "mode" "<MODE>")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel single-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse_cvtpi2ps"
2061 [(set (match_operand:V4SF 0 "register_operand" "=x")
2064 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2065 (match_operand:V4SF 1 "register_operand" "0")
2068 "cvtpi2ps\t{%2, %0|%0, %2}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "mode" "V4SF")])
2072 (define_insn "sse_cvtps2pi"
2073 [(set (match_operand:V2SI 0 "register_operand" "=y")
2075 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2077 (parallel [(const_int 0) (const_int 1)])))]
2079 "cvtps2pi\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx")
2082 (set_attr "mode" "DI")])
2084 (define_insn "sse_cvttps2pi"
2085 [(set (match_operand:V2SI 0 "register_operand" "=y")
2087 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2088 (parallel [(const_int 0) (const_int 1)])))]
2090 "cvttps2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "prefix_rep" "0")
2094 (set_attr "mode" "SF")])
2096 (define_insn "sse_cvtsi2ss"
2097 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2100 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2101 (match_operand:V4SF 1 "register_operand" "0,0,x")
2105 cvtsi2ss\t{%2, %0|%0, %2}
2106 cvtsi2ss\t{%2, %0|%0, %2}
2107 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "isa" "noavx,noavx,avx")
2109 (set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "vector,double,*")
2111 (set_attr "amdfam10_decode" "vector,double,*")
2112 (set_attr "bdver1_decode" "double,direct,*")
2113 (set_attr "prefix" "orig,orig,vex")
2114 (set_attr "mode" "SF")])
2116 (define_insn "sse_cvtsi2ssq"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2120 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2121 (match_operand:V4SF 1 "register_operand" "0,0,x")
2123 "TARGET_SSE && TARGET_64BIT"
2125 cvtsi2ssq\t{%2, %0|%0, %2}
2126 cvtsi2ssq\t{%2, %0|%0, %2}
2127 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2128 [(set_attr "isa" "noavx,noavx,avx")
2129 (set_attr "type" "sseicvt")
2130 (set_attr "athlon_decode" "vector,double,*")
2131 (set_attr "amdfam10_decode" "vector,double,*")
2132 (set_attr "bdver1_decode" "double,direct,*")
2133 (set_attr "length_vex" "*,*,4")
2134 (set_attr "prefix_rex" "1,1,*")
2135 (set_attr "prefix" "orig,orig,vex")
2136 (set_attr "mode" "SF")])
2138 (define_insn "sse_cvtss2si"
2139 [(set (match_operand:SI 0 "register_operand" "=r,r")
2142 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2143 (parallel [(const_int 0)]))]
2144 UNSPEC_FIX_NOTRUNC))]
2146 "%vcvtss2si\t{%1, %0|%0, %1}"
2147 [(set_attr "type" "sseicvt")
2148 (set_attr "athlon_decode" "double,vector")
2149 (set_attr "bdver1_decode" "double,double")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "prefix" "maybe_vex")
2152 (set_attr "mode" "SI")])
2154 (define_insn "sse_cvtss2si_2"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2156 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2157 UNSPEC_FIX_NOTRUNC))]
2159 "%vcvtss2si\t{%1, %0|%0, %1}"
2160 [(set_attr "type" "sseicvt")
2161 (set_attr "athlon_decode" "double,vector")
2162 (set_attr "amdfam10_decode" "double,double")
2163 (set_attr "bdver1_decode" "double,double")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "prefix" "maybe_vex")
2166 (set_attr "mode" "SI")])
2168 (define_insn "sse_cvtss2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2172 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))]
2174 UNSPEC_FIX_NOTRUNC))]
2175 "TARGET_SSE && TARGET_64BIT"
2176 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "sseicvt")
2178 (set_attr "athlon_decode" "double,vector")
2179 (set_attr "bdver1_decode" "double,double")
2180 (set_attr "prefix_rep" "1")
2181 (set_attr "prefix" "maybe_vex")
2182 (set_attr "mode" "DI")])
2184 (define_insn "sse_cvtss2siq_2"
2185 [(set (match_operand:DI 0 "register_operand" "=r,r")
2186 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2187 UNSPEC_FIX_NOTRUNC))]
2188 "TARGET_SSE && TARGET_64BIT"
2189 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "sseicvt")
2191 (set_attr "athlon_decode" "double,vector")
2192 (set_attr "amdfam10_decode" "double,double")
2193 (set_attr "bdver1_decode" "double,double")
2194 (set_attr "prefix_rep" "1")
2195 (set_attr "prefix" "maybe_vex")
2196 (set_attr "mode" "DI")])
2198 (define_insn "sse_cvttss2si"
2199 [(set (match_operand:SI 0 "register_operand" "=r,r")
2202 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2203 (parallel [(const_int 0)]))))]
2205 "%vcvttss2si\t{%1, %0|%0, %1}"
2206 [(set_attr "type" "sseicvt")
2207 (set_attr "athlon_decode" "double,vector")
2208 (set_attr "amdfam10_decode" "double,double")
2209 (set_attr "bdver1_decode" "double,double")
2210 (set_attr "prefix_rep" "1")
2211 (set_attr "prefix" "maybe_vex")
2212 (set_attr "mode" "SI")])
2214 (define_insn "sse_cvttss2siq"
2215 [(set (match_operand:DI 0 "register_operand" "=r,r")
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))))]
2220 "TARGET_SSE && TARGET_64BIT"
2221 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2222 [(set_attr "type" "sseicvt")
2223 (set_attr "athlon_decode" "double,vector")
2224 (set_attr "amdfam10_decode" "double,double")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "DI")])
2230 (define_insn "float<sseintvecmodelower><mode>2"
2231 [(set (match_operand:VF1 0 "register_operand" "=x")
2233 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2235 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "ssecvt")
2237 (set_attr "prefix" "maybe_vex")
2238 (set_attr "mode" "<sseinsnmode>")])
2240 (define_expand "floatuns<sseintvecmodelower><mode>2"
2241 [(match_operand:VF1 0 "register_operand" "")
2242 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2243 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2245 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2249 (define_insn "avx_cvtps2dq256"
2250 [(set (match_operand:V8SI 0 "register_operand" "=x")
2251 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2252 UNSPEC_FIX_NOTRUNC))]
2254 "vcvtps2dq\t{%1, %0|%0, %1}"
2255 [(set_attr "type" "ssecvt")
2256 (set_attr "prefix" "vex")
2257 (set_attr "mode" "OI")])
2259 (define_insn "sse2_cvtps2dq"
2260 [(set (match_operand:V4SI 0 "register_operand" "=x")
2261 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2262 UNSPEC_FIX_NOTRUNC))]
2264 "%vcvtps2dq\t{%1, %0|%0, %1}"
2265 [(set_attr "type" "ssecvt")
2266 (set (attr "prefix_data16")
2268 (match_test "TARGET_AVX")
2270 (const_string "1")))
2271 (set_attr "prefix" "maybe_vex")
2272 (set_attr "mode" "TI")])
2274 (define_insn "fix_truncv8sfv8si2"
2275 [(set (match_operand:V8SI 0 "register_operand" "=x")
2276 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2278 "vcvttps2dq\t{%1, %0|%0, %1}"
2279 [(set_attr "type" "ssecvt")
2280 (set_attr "prefix" "vex")
2281 (set_attr "mode" "OI")])
2283 (define_insn "fix_truncv4sfv4si2"
2284 [(set (match_operand:V4SI 0 "register_operand" "=x")
2285 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2287 "%vcvttps2dq\t{%1, %0|%0, %1}"
2288 [(set_attr "type" "ssecvt")
2289 (set (attr "prefix_rep")
2291 (match_test "TARGET_AVX")
2293 (const_string "1")))
2294 (set (attr "prefix_data16")
2296 (match_test "TARGET_AVX")
2298 (const_string "0")))
2299 (set_attr "prefix_data16" "0")
2300 (set_attr "prefix" "maybe_vex")
2301 (set_attr "mode" "TI")])
2303 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2304 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2305 (match_operand:VF1 1 "register_operand" "")]
2309 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2310 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2311 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2312 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2318 ;; Parallel double-precision floating point conversion operations
2320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2322 (define_insn "sse2_cvtpi2pd"
2323 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2324 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2326 "cvtpi2pd\t{%1, %0|%0, %1}"
2327 [(set_attr "type" "ssecvt")
2328 (set_attr "unit" "mmx,*")
2329 (set_attr "prefix_data16" "1,*")
2330 (set_attr "mode" "V2DF")])
2332 (define_insn "sse2_cvtpd2pi"
2333 [(set (match_operand:V2SI 0 "register_operand" "=y")
2334 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2335 UNSPEC_FIX_NOTRUNC))]
2337 "cvtpd2pi\t{%1, %0|%0, %1}"
2338 [(set_attr "type" "ssecvt")
2339 (set_attr "unit" "mmx")
2340 (set_attr "bdver1_decode" "double")
2341 (set_attr "prefix_data16" "1")
2342 (set_attr "mode" "DI")])
2344 (define_insn "sse2_cvttpd2pi"
2345 [(set (match_operand:V2SI 0 "register_operand" "=y")
2346 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2348 "cvttpd2pi\t{%1, %0|%0, %1}"
2349 [(set_attr "type" "ssecvt")
2350 (set_attr "unit" "mmx")
2351 (set_attr "bdver1_decode" "double")
2352 (set_attr "prefix_data16" "1")
2353 (set_attr "mode" "TI")])
2355 (define_insn "sse2_cvtsi2sd"
2356 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2359 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2360 (match_operand:V2DF 1 "register_operand" "0,0,x")
2364 cvtsi2sd\t{%2, %0|%0, %2}
2365 cvtsi2sd\t{%2, %0|%0, %2}
2366 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2367 [(set_attr "isa" "noavx,noavx,avx")
2368 (set_attr "type" "sseicvt")
2369 (set_attr "athlon_decode" "double,direct,*")
2370 (set_attr "amdfam10_decode" "vector,double,*")
2371 (set_attr "bdver1_decode" "double,direct,*")
2372 (set_attr "prefix" "orig,orig,vex")
2373 (set_attr "mode" "DF")])
2375 (define_insn "sse2_cvtsi2sdq"
2376 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2379 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2380 (match_operand:V2DF 1 "register_operand" "0,0,x")
2382 "TARGET_SSE2 && TARGET_64BIT"
2384 cvtsi2sdq\t{%2, %0|%0, %2}
2385 cvtsi2sdq\t{%2, %0|%0, %2}
2386 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2387 [(set_attr "isa" "noavx,noavx,avx")
2388 (set_attr "type" "sseicvt")
2389 (set_attr "athlon_decode" "double,direct,*")
2390 (set_attr "amdfam10_decode" "vector,double,*")
2391 (set_attr "bdver1_decode" "double,direct,*")
2392 (set_attr "length_vex" "*,*,4")
2393 (set_attr "prefix_rex" "1,1,*")
2394 (set_attr "prefix" "orig,orig,vex")
2395 (set_attr "mode" "DF")])
2397 (define_insn "sse2_cvtsd2si"
2398 [(set (match_operand:SI 0 "register_operand" "=r,r")
2401 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2402 (parallel [(const_int 0)]))]
2403 UNSPEC_FIX_NOTRUNC))]
2405 "%vcvtsd2si\t{%1, %0|%0, %1}"
2406 [(set_attr "type" "sseicvt")
2407 (set_attr "athlon_decode" "double,vector")
2408 (set_attr "bdver1_decode" "double,double")
2409 (set_attr "prefix_rep" "1")
2410 (set_attr "prefix" "maybe_vex")
2411 (set_attr "mode" "SI")])
2413 (define_insn "sse2_cvtsd2si_2"
2414 [(set (match_operand:SI 0 "register_operand" "=r,r")
2415 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2416 UNSPEC_FIX_NOTRUNC))]
2418 "%vcvtsd2si\t{%1, %0|%0, %1}"
2419 [(set_attr "type" "sseicvt")
2420 (set_attr "athlon_decode" "double,vector")
2421 (set_attr "amdfam10_decode" "double,double")
2422 (set_attr "bdver1_decode" "double,double")
2423 (set_attr "prefix_rep" "1")
2424 (set_attr "prefix" "maybe_vex")
2425 (set_attr "mode" "SI")])
2427 (define_insn "sse2_cvtsd2siq"
2428 [(set (match_operand:DI 0 "register_operand" "=r,r")
2431 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2432 (parallel [(const_int 0)]))]
2433 UNSPEC_FIX_NOTRUNC))]
2434 "TARGET_SSE2 && TARGET_64BIT"
2435 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2436 [(set_attr "type" "sseicvt")
2437 (set_attr "athlon_decode" "double,vector")
2438 (set_attr "bdver1_decode" "double,double")
2439 (set_attr "prefix_rep" "1")
2440 (set_attr "prefix" "maybe_vex")
2441 (set_attr "mode" "DI")])
2443 (define_insn "sse2_cvtsd2siq_2"
2444 [(set (match_operand:DI 0 "register_operand" "=r,r")
2445 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2446 UNSPEC_FIX_NOTRUNC))]
2447 "TARGET_SSE2 && TARGET_64BIT"
2448 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2449 [(set_attr "type" "sseicvt")
2450 (set_attr "athlon_decode" "double,vector")
2451 (set_attr "amdfam10_decode" "double,double")
2452 (set_attr "bdver1_decode" "double,double")
2453 (set_attr "prefix_rep" "1")
2454 (set_attr "prefix" "maybe_vex")
2455 (set_attr "mode" "DI")])
2457 (define_insn "sse2_cvttsd2si"
2458 [(set (match_operand:SI 0 "register_operand" "=r,r")
2461 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2462 (parallel [(const_int 0)]))))]
2464 "%vcvttsd2si\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "sseicvt")
2466 (set_attr "athlon_decode" "double,vector")
2467 (set_attr "amdfam10_decode" "double,double")
2468 (set_attr "bdver1_decode" "double,double")
2469 (set_attr "prefix_rep" "1")
2470 (set_attr "prefix" "maybe_vex")
2471 (set_attr "mode" "SI")])
2473 (define_insn "sse2_cvttsd2siq"
2474 [(set (match_operand:DI 0 "register_operand" "=r,r")
2477 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2478 (parallel [(const_int 0)]))))]
2479 "TARGET_SSE2 && TARGET_64BIT"
2480 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2481 [(set_attr "type" "sseicvt")
2482 (set_attr "athlon_decode" "double,vector")
2483 (set_attr "amdfam10_decode" "double,double")
2484 (set_attr "bdver1_decode" "double,double")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix" "maybe_vex")
2487 (set_attr "mode" "DI")])
2489 (define_insn "floatv4siv4df2"
2490 [(set (match_operand:V4DF 0 "register_operand" "=x")
2491 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2493 "vcvtdq2pd\t{%1, %0|%0, %1}"
2494 [(set_attr "type" "ssecvt")
2495 (set_attr "prefix" "vex")
2496 (set_attr "mode" "V4DF")])
2498 (define_insn "avx_cvtdq2pd256_2"
2499 [(set (match_operand:V4DF 0 "register_operand" "=x")
2502 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2503 (parallel [(const_int 0) (const_int 1)
2504 (const_int 2) (const_int 3)]))))]
2506 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2507 [(set_attr "type" "ssecvt")
2508 (set_attr "prefix" "vex")
2509 (set_attr "mode" "V4DF")])
2511 (define_insn "sse2_cvtdq2pd"
2512 [(set (match_operand:V2DF 0 "register_operand" "=x")
2515 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2516 (parallel [(const_int 0) (const_int 1)]))))]
2518 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2519 [(set_attr "type" "ssecvt")
2520 (set_attr "prefix" "maybe_vex")
2521 (set_attr "mode" "V2DF")])
2523 (define_insn "avx_cvtpd2dq256"
2524 [(set (match_operand:V4SI 0 "register_operand" "=x")
2525 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2526 UNSPEC_FIX_NOTRUNC))]
2528 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2529 [(set_attr "type" "ssecvt")
2530 (set_attr "prefix" "vex")
2531 (set_attr "mode" "OI")])
2533 (define_expand "avx_cvtpd2dq256_2"
2534 [(set (match_operand:V8SI 0 "register_operand" "")
2536 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2540 "operands[2] = CONST0_RTX (V4SImode);")
2542 (define_insn "*avx_cvtpd2dq256_2"
2543 [(set (match_operand:V8SI 0 "register_operand" "=x")
2545 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2547 (match_operand:V4SI 2 "const0_operand" "")))]
2549 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2550 [(set_attr "type" "ssecvt")
2551 (set_attr "prefix" "vex")
2552 (set_attr "mode" "OI")])
2554 (define_expand "sse2_cvtpd2dq"
2555 [(set (match_operand:V4SI 0 "register_operand" "")
2557 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2561 "operands[2] = CONST0_RTX (V2SImode);")
2563 (define_insn "*sse2_cvtpd2dq"
2564 [(set (match_operand:V4SI 0 "register_operand" "=x")
2566 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2568 (match_operand:V2SI 2 "const0_operand" "")))]
2572 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2574 return "cvtpd2dq\t{%1, %0|%0, %1}";
2576 [(set_attr "type" "ssecvt")
2577 (set_attr "prefix_rep" "1")
2578 (set_attr "prefix_data16" "0")
2579 (set_attr "prefix" "maybe_vex")
2580 (set_attr "mode" "TI")
2581 (set_attr "amdfam10_decode" "double")
2582 (set_attr "athlon_decode" "vector")
2583 (set_attr "bdver1_decode" "double")])
2585 (define_insn "fix_truncv4dfv4si2"
2586 [(set (match_operand:V4SI 0 "register_operand" "=x")
2587 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2589 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2590 [(set_attr "type" "ssecvt")
2591 (set_attr "prefix" "vex")
2592 (set_attr "mode" "OI")])
2594 (define_expand "avx_cvttpd2dq256_2"
2595 [(set (match_operand:V8SI 0 "register_operand" "")
2597 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2600 "operands[2] = CONST0_RTX (V4SImode);")
2602 (define_insn "*avx_cvttpd2dq256_2"
2603 [(set (match_operand:V8SI 0 "register_operand" "=x")
2605 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2606 (match_operand:V4SI 2 "const0_operand" "")))]
2608 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2609 [(set_attr "type" "ssecvt")
2610 (set_attr "prefix" "vex")
2611 (set_attr "mode" "OI")])
2613 (define_expand "sse2_cvttpd2dq"
2614 [(set (match_operand:V4SI 0 "register_operand" "")
2616 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2619 "operands[2] = CONST0_RTX (V2SImode);")
2621 (define_insn "*sse2_cvttpd2dq"
2622 [(set (match_operand:V4SI 0 "register_operand" "=x")
2624 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2625 (match_operand:V2SI 2 "const0_operand" "")))]
2629 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2631 return "cvttpd2dq\t{%1, %0|%0, %1}";
2633 [(set_attr "type" "ssecvt")
2634 (set_attr "amdfam10_decode" "double")
2635 (set_attr "athlon_decode" "vector")
2636 (set_attr "bdver1_decode" "double")
2637 (set_attr "prefix" "maybe_vex")
2638 (set_attr "mode" "TI")])
2640 (define_insn "sse2_cvtsd2ss"
2641 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2644 (float_truncate:V2SF
2645 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2646 (match_operand:V4SF 1 "register_operand" "0,0,x")
2650 cvtsd2ss\t{%2, %0|%0, %2}
2651 cvtsd2ss\t{%2, %0|%0, %2}
2652 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2653 [(set_attr "isa" "noavx,noavx,avx")
2654 (set_attr "type" "ssecvt")
2655 (set_attr "athlon_decode" "vector,double,*")
2656 (set_attr "amdfam10_decode" "vector,double,*")
2657 (set_attr "bdver1_decode" "direct,direct,*")
2658 (set_attr "prefix" "orig,orig,vex")
2659 (set_attr "mode" "SF")])
2661 (define_insn "sse2_cvtss2sd"
2662 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2666 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2667 (parallel [(const_int 0) (const_int 1)])))
2668 (match_operand:V2DF 1 "register_operand" "0,0,x")
2672 cvtss2sd\t{%2, %0|%0, %2}
2673 cvtss2sd\t{%2, %0|%0, %2}
2674 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2675 [(set_attr "isa" "noavx,noavx,avx")
2676 (set_attr "type" "ssecvt")
2677 (set_attr "amdfam10_decode" "vector,double,*")
2678 (set_attr "athlon_decode" "direct,direct,*")
2679 (set_attr "bdver1_decode" "direct,direct,*")
2680 (set_attr "prefix" "orig,orig,vex")
2681 (set_attr "mode" "DF")])
2683 (define_insn "avx_cvtpd2ps256"
2684 [(set (match_operand:V4SF 0 "register_operand" "=x")
2685 (float_truncate:V4SF
2686 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2688 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2689 [(set_attr "type" "ssecvt")
2690 (set_attr "prefix" "vex")
2691 (set_attr "mode" "V4SF")])
2693 (define_expand "sse2_cvtpd2ps"
2694 [(set (match_operand:V4SF 0 "register_operand" "")
2696 (float_truncate:V2SF
2697 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2700 "operands[2] = CONST0_RTX (V2SFmode);")
2702 (define_insn "*sse2_cvtpd2ps"
2703 [(set (match_operand:V4SF 0 "register_operand" "=x")
2705 (float_truncate:V2SF
2706 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2707 (match_operand:V2SF 2 "const0_operand" "")))]
2711 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2713 return "cvtpd2ps\t{%1, %0|%0, %1}";
2715 [(set_attr "type" "ssecvt")
2716 (set_attr "amdfam10_decode" "double")
2717 (set_attr "athlon_decode" "vector")
2718 (set_attr "bdver1_decode" "double")
2719 (set_attr "prefix_data16" "1")
2720 (set_attr "prefix" "maybe_vex")
2721 (set_attr "mode" "V4SF")])
2723 (define_insn "avx_cvtps2pd256"
2724 [(set (match_operand:V4DF 0 "register_operand" "=x")
2726 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2728 "vcvtps2pd\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "prefix" "vex")
2731 (set_attr "mode" "V4DF")])
2733 (define_insn "*avx_cvtps2pd256_2"
2734 [(set (match_operand:V4DF 0 "register_operand" "=x")
2737 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2738 (parallel [(const_int 0) (const_int 1)
2739 (const_int 2) (const_int 3)]))))]
2741 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2742 [(set_attr "type" "ssecvt")
2743 (set_attr "prefix" "vex")
2744 (set_attr "mode" "V4DF")])
2746 (define_insn "sse2_cvtps2pd"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2750 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2751 (parallel [(const_int 0) (const_int 1)]))))]
2753 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2754 [(set_attr "type" "ssecvt")
2755 (set_attr "amdfam10_decode" "direct")
2756 (set_attr "athlon_decode" "double")
2757 (set_attr "bdver1_decode" "double")
2758 (set_attr "prefix_data16" "0")
2759 (set_attr "prefix" "maybe_vex")
2760 (set_attr "mode" "V2DF")])
2762 (define_expand "vec_unpacks_hi_v4sf"
2767 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2768 (parallel [(const_int 6) (const_int 7)
2769 (const_int 2) (const_int 3)])))
2770 (set (match_operand:V2DF 0 "register_operand" "")
2774 (parallel [(const_int 0) (const_int 1)]))))]
2776 "operands[2] = gen_reg_rtx (V4SFmode);")
2778 (define_expand "vec_unpacks_hi_v8sf"
2781 (match_operand:V8SF 1 "nonimmediate_operand" "")
2782 (parallel [(const_int 4) (const_int 5)
2783 (const_int 6) (const_int 7)])))
2784 (set (match_operand:V4DF 0 "register_operand" "")
2788 "operands[2] = gen_reg_rtx (V4SFmode);")
2790 (define_expand "vec_unpacks_lo_v4sf"
2791 [(set (match_operand:V2DF 0 "register_operand" "")
2794 (match_operand:V4SF 1 "nonimmediate_operand" "")
2795 (parallel [(const_int 0) (const_int 1)]))))]
2798 (define_expand "vec_unpacks_lo_v8sf"
2799 [(set (match_operand:V4DF 0 "register_operand" "")
2802 (match_operand:V8SF 1 "nonimmediate_operand" "")
2803 (parallel [(const_int 0) (const_int 1)
2804 (const_int 2) (const_int 3)]))))]
2807 (define_mode_attr sseunpackfltmode
2808 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2810 (define_expand "vec_unpacks_float_hi_<mode>"
2811 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2812 (match_operand:VI2_AVX2 1 "register_operand" "")]
2815 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2817 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2818 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2819 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2823 (define_expand "vec_unpacks_float_lo_<mode>"
2824 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2825 (match_operand:VI2_AVX2 1 "register_operand" "")]
2828 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2830 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2831 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2832 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2836 (define_expand "vec_unpacku_float_hi_<mode>"
2837 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2838 (match_operand:VI2_AVX2 1 "register_operand" "")]
2841 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2843 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2844 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2845 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2849 (define_expand "vec_unpacku_float_lo_<mode>"
2850 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2851 (match_operand:VI2_AVX2 1 "register_operand" "")]
2854 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2856 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2857 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2858 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2862 (define_expand "vec_unpacks_float_hi_v4si"
2865 (match_operand:V4SI 1 "nonimmediate_operand" "")
2866 (parallel [(const_int 2) (const_int 3)
2867 (const_int 2) (const_int 3)])))
2868 (set (match_operand:V2DF 0 "register_operand" "")
2872 (parallel [(const_int 0) (const_int 1)]))))]
2874 "operands[2] = gen_reg_rtx (V4SImode);")
2876 (define_expand "vec_unpacks_float_lo_v4si"
2877 [(set (match_operand:V2DF 0 "register_operand" "")
2880 (match_operand:V4SI 1 "nonimmediate_operand" "")
2881 (parallel [(const_int 0) (const_int 1)]))))]
2884 (define_expand "vec_unpacks_float_hi_v8si"
2887 (match_operand:V8SI 1 "nonimmediate_operand" "")
2888 (parallel [(const_int 4) (const_int 5)
2889 (const_int 6) (const_int 7)])))
2890 (set (match_operand:V4DF 0 "register_operand" "")
2894 "operands[2] = gen_reg_rtx (V4SImode);")
2896 (define_expand "vec_unpacks_float_lo_v8si"
2897 [(set (match_operand:V4DF 0 "register_operand" "")
2900 (match_operand:V8SI 1 "nonimmediate_operand" "")
2901 (parallel [(const_int 0) (const_int 1)
2902 (const_int 2) (const_int 3)]))))]
2905 (define_expand "vec_unpacku_float_hi_v4si"
2908 (match_operand:V4SI 1 "nonimmediate_operand" "")
2909 (parallel [(const_int 2) (const_int 3)
2910 (const_int 2) (const_int 3)])))
2915 (parallel [(const_int 0) (const_int 1)]))))
2917 (lt:V2DF (match_dup 6) (match_dup 3)))
2919 (and:V2DF (match_dup 7) (match_dup 4)))
2920 (set (match_operand:V2DF 0 "register_operand" "")
2921 (plus:V2DF (match_dup 6) (match_dup 8)))]
2924 REAL_VALUE_TYPE TWO32r;
2928 real_ldexp (&TWO32r, &dconst1, 32);
2929 x = const_double_from_real_value (TWO32r, DFmode);
2931 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2932 operands[4] = force_reg (V2DFmode,
2933 ix86_build_const_vector (V2DFmode, 1, x));
2935 operands[5] = gen_reg_rtx (V4SImode);
2937 for (i = 6; i < 9; i++)
2938 operands[i] = gen_reg_rtx (V2DFmode);
2941 (define_expand "vec_unpacku_float_lo_v4si"
2945 (match_operand:V4SI 1 "nonimmediate_operand" "")
2946 (parallel [(const_int 0) (const_int 1)]))))
2948 (lt:V2DF (match_dup 5) (match_dup 3)))
2950 (and:V2DF (match_dup 6) (match_dup 4)))
2951 (set (match_operand:V2DF 0 "register_operand" "")
2952 (plus:V2DF (match_dup 5) (match_dup 7)))]
2955 REAL_VALUE_TYPE TWO32r;
2959 real_ldexp (&TWO32r, &dconst1, 32);
2960 x = const_double_from_real_value (TWO32r, DFmode);
2962 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2963 operands[4] = force_reg (V2DFmode,
2964 ix86_build_const_vector (V2DFmode, 1, x));
2966 for (i = 5; i < 8; i++)
2967 operands[i] = gen_reg_rtx (V2DFmode);
2970 (define_expand "vec_unpacku_float_hi_v8si"
2971 [(match_operand:V4DF 0 "register_operand" "")
2972 (match_operand:V8SI 1 "register_operand" "")]
2975 REAL_VALUE_TYPE TWO32r;
2979 real_ldexp (&TWO32r, &dconst1, 32);
2980 x = const_double_from_real_value (TWO32r, DFmode);
2982 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2983 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2984 tmp[5] = gen_reg_rtx (V4SImode);
2986 for (i = 2; i < 5; i++)
2987 tmp[i] = gen_reg_rtx (V4DFmode);
2988 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2989 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2990 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2991 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2992 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2993 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2997 (define_expand "vec_unpacku_float_lo_v8si"
2998 [(match_operand:V4DF 0 "register_operand" "")
2999 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3002 REAL_VALUE_TYPE TWO32r;
3006 real_ldexp (&TWO32r, &dconst1, 32);
3007 x = const_double_from_real_value (TWO32r, DFmode);
3009 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3010 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3012 for (i = 2; i < 5; i++)
3013 tmp[i] = gen_reg_rtx (V4DFmode);
3014 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3015 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3016 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3017 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3018 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3022 (define_expand "vec_pack_trunc_v4df"
3024 (float_truncate:V4SF
3025 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3027 (float_truncate:V4SF
3028 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3029 (set (match_operand:V8SF 0 "register_operand" "")
3035 operands[3] = gen_reg_rtx (V4SFmode);
3036 operands[4] = gen_reg_rtx (V4SFmode);
3039 (define_expand "vec_pack_trunc_v2df"
3040 [(match_operand:V4SF 0 "register_operand" "")
3041 (match_operand:V2DF 1 "nonimmediate_operand" "")
3042 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3047 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3049 tmp0 = gen_reg_rtx (V4DFmode);
3050 tmp1 = force_reg (V2DFmode, operands[1]);
3052 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3053 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3057 tmp0 = gen_reg_rtx (V4SFmode);
3058 tmp1 = gen_reg_rtx (V4SFmode);
3060 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3061 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3062 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3067 (define_expand "vec_pack_sfix_trunc_v4df"
3068 [(match_operand:V8SI 0 "register_operand" "")
3069 (match_operand:V4DF 1 "nonimmediate_operand" "")
3070 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3075 r1 = gen_reg_rtx (V4SImode);
3076 r2 = gen_reg_rtx (V4SImode);
3078 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3079 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3080 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3084 (define_expand "vec_pack_sfix_trunc_v2df"
3085 [(match_operand:V4SI 0 "register_operand" "")
3086 (match_operand:V2DF 1 "nonimmediate_operand" "")
3087 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3092 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3094 tmp0 = gen_reg_rtx (V4DFmode);
3095 tmp1 = force_reg (V2DFmode, operands[1]);
3097 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3098 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3102 tmp0 = gen_reg_rtx (V4SImode);
3103 tmp1 = gen_reg_rtx (V4SImode);
3105 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3106 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3108 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3109 gen_lowpart (V2DImode, tmp0),
3110 gen_lowpart (V2DImode, tmp1)));
3115 (define_mode_attr ssepackfltmode
3116 [(V4DF "V8SI") (V2DF "V4SI")])
3118 (define_expand "vec_pack_ufix_trunc_<mode>"
3119 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3120 (match_operand:VF2 1 "register_operand" "")
3121 (match_operand:VF2 2 "register_operand" "")]
3125 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3126 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3127 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3128 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3129 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3131 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3132 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3136 tmp[5] = gen_reg_rtx (V8SFmode);
3137 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3138 gen_lowpart (V8SFmode, tmp[3]), 0);
3139 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3141 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3142 operands[0], 0, OPTAB_DIRECT);
3143 if (tmp[6] != operands[0])
3144 emit_move_insn (operands[0], tmp[6]);
3148 (define_expand "vec_pack_sfix_v4df"
3149 [(match_operand:V8SI 0 "register_operand" "")
3150 (match_operand:V4DF 1 "nonimmediate_operand" "")
3151 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3156 r1 = gen_reg_rtx (V4SImode);
3157 r2 = gen_reg_rtx (V4SImode);
3159 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3160 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3161 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3165 (define_expand "vec_pack_sfix_v2df"
3166 [(match_operand:V4SI 0 "register_operand" "")
3167 (match_operand:V2DF 1 "nonimmediate_operand" "")
3168 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3173 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3175 tmp0 = gen_reg_rtx (V4DFmode);
3176 tmp1 = force_reg (V2DFmode, operands[1]);
3178 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3179 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3183 tmp0 = gen_reg_rtx (V4SImode);
3184 tmp1 = gen_reg_rtx (V4SImode);
3186 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3187 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3189 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3190 gen_lowpart (V2DImode, tmp0),
3191 gen_lowpart (V2DImode, tmp1)));
3196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3198 ;; Parallel single-precision floating point element swizzling
3200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3202 (define_expand "sse_movhlps_exp"
3203 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3206 (match_operand:V4SF 1 "nonimmediate_operand" "")
3207 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3208 (parallel [(const_int 6)
3214 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3216 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3218 /* Fix up the destination if needed. */
3219 if (dst != operands[0])
3220 emit_move_insn (operands[0], dst);
3225 (define_insn "sse_movhlps"
3226 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3229 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3230 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3231 (parallel [(const_int 6)
3235 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3237 movhlps\t{%2, %0|%0, %2}
3238 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3239 movlps\t{%H2, %0|%0, %H2}
3240 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3241 %vmovhps\t{%2, %0|%0, %2}"
3242 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3243 (set_attr "type" "ssemov")
3244 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3245 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3247 (define_expand "sse_movlhps_exp"
3248 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3251 (match_operand:V4SF 1 "nonimmediate_operand" "")
3252 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3253 (parallel [(const_int 0)
3259 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3261 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3263 /* Fix up the destination if needed. */
3264 if (dst != operands[0])
3265 emit_move_insn (operands[0], dst);
3270 (define_insn "sse_movlhps"
3271 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3274 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3275 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3276 (parallel [(const_int 0)
3280 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3282 movlhps\t{%2, %0|%0, %2}
3283 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3284 movhps\t{%2, %0|%0, %2}
3285 vmovhps\t{%2, %1, %0|%0, %1, %2}
3286 %vmovlps\t{%2, %H0|%H0, %2}"
3287 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3288 (set_attr "type" "ssemov")
3289 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3290 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3292 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3293 (define_insn "avx_unpckhps256"
3294 [(set (match_operand:V8SF 0 "register_operand" "=x")
3297 (match_operand:V8SF 1 "register_operand" "x")
3298 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3299 (parallel [(const_int 2) (const_int 10)
3300 (const_int 3) (const_int 11)
3301 (const_int 6) (const_int 14)
3302 (const_int 7) (const_int 15)])))]
3304 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3305 [(set_attr "type" "sselog")
3306 (set_attr "prefix" "vex")
3307 (set_attr "mode" "V8SF")])
3309 (define_expand "vec_interleave_highv8sf"
3313 (match_operand:V8SF 1 "register_operand" "x")
3314 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3315 (parallel [(const_int 0) (const_int 8)
3316 (const_int 1) (const_int 9)
3317 (const_int 4) (const_int 12)
3318 (const_int 5) (const_int 13)])))
3324 (parallel [(const_int 2) (const_int 10)
3325 (const_int 3) (const_int 11)
3326 (const_int 6) (const_int 14)
3327 (const_int 7) (const_int 15)])))
3328 (set (match_operand:V8SF 0 "register_operand" "")
3333 (parallel [(const_int 4) (const_int 5)
3334 (const_int 6) (const_int 7)
3335 (const_int 12) (const_int 13)
3336 (const_int 14) (const_int 15)])))]
3339 operands[3] = gen_reg_rtx (V8SFmode);
3340 operands[4] = gen_reg_rtx (V8SFmode);
3343 (define_insn "vec_interleave_highv4sf"
3344 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3347 (match_operand:V4SF 1 "register_operand" "0,x")
3348 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3349 (parallel [(const_int 2) (const_int 6)
3350 (const_int 3) (const_int 7)])))]
3353 unpckhps\t{%2, %0|%0, %2}
3354 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3355 [(set_attr "isa" "noavx,avx")
3356 (set_attr "type" "sselog")
3357 (set_attr "prefix" "orig,vex")
3358 (set_attr "mode" "V4SF")])
3360 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3361 (define_insn "avx_unpcklps256"
3362 [(set (match_operand:V8SF 0 "register_operand" "=x")
3365 (match_operand:V8SF 1 "register_operand" "x")
3366 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3367 (parallel [(const_int 0) (const_int 8)
3368 (const_int 1) (const_int 9)
3369 (const_int 4) (const_int 12)
3370 (const_int 5) (const_int 13)])))]
3372 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3373 [(set_attr "type" "sselog")
3374 (set_attr "prefix" "vex")
3375 (set_attr "mode" "V8SF")])
3377 (define_expand "vec_interleave_lowv8sf"
3381 (match_operand:V8SF 1 "register_operand" "x")
3382 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3383 (parallel [(const_int 0) (const_int 8)
3384 (const_int 1) (const_int 9)
3385 (const_int 4) (const_int 12)
3386 (const_int 5) (const_int 13)])))
3392 (parallel [(const_int 2) (const_int 10)
3393 (const_int 3) (const_int 11)
3394 (const_int 6) (const_int 14)
3395 (const_int 7) (const_int 15)])))
3396 (set (match_operand:V8SF 0 "register_operand" "")
3401 (parallel [(const_int 0) (const_int 1)
3402 (const_int 2) (const_int 3)
3403 (const_int 8) (const_int 9)
3404 (const_int 10) (const_int 11)])))]
3407 operands[3] = gen_reg_rtx (V8SFmode);
3408 operands[4] = gen_reg_rtx (V8SFmode);
3411 (define_insn "vec_interleave_lowv4sf"
3412 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3415 (match_operand:V4SF 1 "register_operand" "0,x")
3416 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3417 (parallel [(const_int 0) (const_int 4)
3418 (const_int 1) (const_int 5)])))]
3421 unpcklps\t{%2, %0|%0, %2}
3422 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3423 [(set_attr "isa" "noavx,avx")
3424 (set_attr "type" "sselog")
3425 (set_attr "prefix" "orig,vex")
3426 (set_attr "mode" "V4SF")])
3428 ;; These are modeled with the same vec_concat as the others so that we
3429 ;; capture users of shufps that can use the new instructions
3430 (define_insn "avx_movshdup256"
3431 [(set (match_operand:V8SF 0 "register_operand" "=x")
3434 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3436 (parallel [(const_int 1) (const_int 1)
3437 (const_int 3) (const_int 3)
3438 (const_int 5) (const_int 5)
3439 (const_int 7) (const_int 7)])))]
3441 "vmovshdup\t{%1, %0|%0, %1}"
3442 [(set_attr "type" "sse")
3443 (set_attr "prefix" "vex")
3444 (set_attr "mode" "V8SF")])
3446 (define_insn "sse3_movshdup"
3447 [(set (match_operand:V4SF 0 "register_operand" "=x")
3450 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3452 (parallel [(const_int 1)
3457 "%vmovshdup\t{%1, %0|%0, %1}"
3458 [(set_attr "type" "sse")
3459 (set_attr "prefix_rep" "1")
3460 (set_attr "prefix" "maybe_vex")
3461 (set_attr "mode" "V4SF")])
3463 (define_insn "avx_movsldup256"
3464 [(set (match_operand:V8SF 0 "register_operand" "=x")
3467 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3469 (parallel [(const_int 0) (const_int 0)
3470 (const_int 2) (const_int 2)
3471 (const_int 4) (const_int 4)
3472 (const_int 6) (const_int 6)])))]
3474 "vmovsldup\t{%1, %0|%0, %1}"
3475 [(set_attr "type" "sse")
3476 (set_attr "prefix" "vex")
3477 (set_attr "mode" "V8SF")])