1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
198 return "vmovaps\t{%1, %0|%0, %1}";
200 return "vmovapd\t{%1, %0|%0, %1}";
202 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
203 return "vmovaps\t{%1, %0|%0, %1}";
205 return "vmovdqa\t{%1, %0|%0, %1}";
211 [(set_attr "type" "sselog1,ssemov,ssemov")
212 (set_attr "prefix" "vex")
213 (set_attr "mode" "<avxvecmode>")])
215 ;; All of these patterns are enabled for SSE1 as well as SSE2.
216 ;; This is essential for maintaining stable calling conventions.
218 (define_expand "mov<mode>"
219 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
220 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
223 ix86_expand_vector_move (<MODE>mode, operands);
227 (define_insn "*mov<mode>_internal"
228 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
229 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
231 && (register_operand (operands[0], <MODE>mode)
232 || register_operand (operands[1], <MODE>mode))"
234 switch (which_alternative)
237 return standard_sse_constant_opcode (insn, operands[1]);
240 switch (get_attr_mode (insn))
243 return "movaps\t{%1, %0|%0, %1}";
245 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
246 return "movaps\t{%1, %0|%0, %1}";
248 return "movapd\t{%1, %0|%0, %1}";
250 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
251 return "movaps\t{%1, %0|%0, %1}";
253 return "movdqa\t{%1, %0|%0, %1}";
259 [(set_attr "type" "sselog1,ssemov,ssemov")
261 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
262 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
263 (and (eq_attr "alternative" "2")
264 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
266 (const_string "V4SF")
267 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
268 (const_string "V4SF")
269 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
270 (const_string "V2DF")
272 (const_string "TI")))])
274 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
275 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
276 ;; from memory, we'd prefer to load the memory directly into the %xmm
277 ;; register. To facilitate this happy circumstance, this pattern won't
278 ;; split until after register allocation. If the 64-bit value didn't
279 ;; come from memory, this is the best we can do. This is much better
280 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
283 (define_insn_and_split "movdi_to_sse"
285 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
286 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
287 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
288 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
290 "&& reload_completed"
293 if (register_operand (operands[1], DImode))
295 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
296 Assemble the 64-bit DImode value in an xmm register. */
297 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
298 gen_rtx_SUBREG (SImode, operands[1], 0)));
299 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
300 gen_rtx_SUBREG (SImode, operands[1], 4)));
301 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
304 else if (memory_operand (operands[1], DImode))
305 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
306 operands[1], const0_rtx));
312 [(set (match_operand:V4SF 0 "register_operand" "")
313 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
314 "TARGET_SSE && reload_completed"
317 (vec_duplicate:V4SF (match_dup 1))
321 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
322 operands[2] = CONST0_RTX (V4SFmode);
326 [(set (match_operand:V2DF 0 "register_operand" "")
327 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
328 "TARGET_SSE2 && reload_completed"
329 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
331 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
332 operands[2] = CONST0_RTX (DFmode);
335 (define_expand "push<mode>1"
336 [(match_operand:AVX256MODE 0 "register_operand" "")]
339 ix86_expand_push (<MODE>mode, operands[0]);
343 (define_expand "push<mode>1"
344 [(match_operand:SSEMODE16 0 "register_operand" "")]
347 ix86_expand_push (<MODE>mode, operands[0]);
351 (define_expand "movmisalign<mode>"
352 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
353 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
356 ix86_expand_vector_move_misalign (<MODE>mode, operands);
360 (define_expand "movmisalign<mode>"
361 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
362 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
365 ix86_expand_vector_move_misalign (<MODE>mode, operands);
369 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
370 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
372 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
374 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
375 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
376 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
377 [(set_attr "type" "ssemov")
378 (set_attr "movu" "1")
379 (set_attr "prefix" "vex")
380 (set_attr "mode" "<MODE>")])
382 (define_insn "sse2_movq128"
383 [(set (match_operand:V2DI 0 "register_operand" "=x")
386 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
387 (parallel [(const_int 0)]))
390 "%vmovq\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "prefix" "maybe_vex")
393 (set_attr "mode" "TI")])
395 (define_insn "<sse>_movup<ssemodesuffixf2c>"
396 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
400 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
401 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
402 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
403 [(set_attr "type" "ssemov")
404 (set_attr "movu" "1")
405 (set_attr "mode" "<MODE>")])
407 (define_insn "avx_movdqu<avxmodesuffix>"
408 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
410 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
412 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
413 "vmovdqu\t{%1, %0|%0, %1}"
414 [(set_attr "type" "ssemov")
415 (set_attr "movu" "1")
416 (set_attr "prefix" "vex")
417 (set_attr "mode" "<avxvecmode>")])
419 (define_insn "sse2_movdqu"
420 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
421 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
423 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
424 "movdqu\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "movu" "1")
427 (set_attr "prefix_data16" "1")
428 (set_attr "mode" "TI")])
430 (define_insn "avx_movnt<mode>"
431 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
433 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
435 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
436 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
437 [(set_attr "type" "ssemov")
438 (set_attr "prefix" "vex")
439 (set_attr "mode" "<MODE>")])
441 (define_insn "<sse>_movnt<mode>"
442 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
444 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
446 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
447 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
448 [(set_attr "type" "ssemov")
449 (set_attr "mode" "<MODE>")])
451 (define_insn "avx_movnt<mode>"
452 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
454 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
457 "vmovntdq\t{%1, %0|%0, %1}"
458 [(set_attr "type" "ssecvt")
459 (set_attr "prefix" "vex")
460 (set_attr "mode" "<avxvecmode>")])
462 (define_insn "sse2_movntv2di"
463 [(set (match_operand:V2DI 0 "memory_operand" "=m")
464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
467 "movntdq\t{%1, %0|%0, %1}"
468 [(set_attr "type" "ssemov")
469 (set_attr "prefix_data16" "1")
470 (set_attr "mode" "TI")])
472 (define_insn "sse2_movntsi"
473 [(set (match_operand:SI 0 "memory_operand" "=m")
474 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
477 "movnti\t{%1, %0|%0, %1}"
478 [(set_attr "type" "ssemov")
479 (set_attr "prefix_data16" "0")
480 (set_attr "mode" "V2DF")])
482 (define_insn "avx_lddqu<avxmodesuffix>"
483 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
485 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
488 "vlddqu\t{%1, %0|%0, %1}"
489 [(set_attr "type" "ssecvt")
490 (set_attr "movu" "1")
491 (set_attr "prefix" "vex")
492 (set_attr "mode" "<avxvecmode>")])
494 (define_insn "sse3_lddqu"
495 [(set (match_operand:V16QI 0 "register_operand" "=x")
496 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
499 "lddqu\t{%1, %0|%0, %1}"
500 [(set_attr "type" "ssemov")
501 (set_attr "movu" "1")
502 (set_attr "prefix_data16" "0")
503 (set_attr "prefix_rep" "1")
504 (set_attr "mode" "TI")])
506 ; Expand patterns for non-temporal stores. At the moment, only those
507 ; that directly map to insns are defined; it would be possible to
508 ; define patterns for other modes that would expand to several insns.
510 (define_expand "storent<mode>"
511 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
513 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
515 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
518 (define_expand "storent<mode>"
519 [(set (match_operand:MODEF 0 "memory_operand" "")
521 [(match_operand:MODEF 1 "register_operand" "")]
526 (define_expand "storentv2di"
527 [(set (match_operand:V2DI 0 "memory_operand" "")
528 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
533 (define_expand "storentsi"
534 [(set (match_operand:SI 0 "memory_operand" "")
535 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
540 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
542 ;; Parallel floating point arithmetic
544 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
546 (define_expand "<code><mode>2"
547 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
549 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
550 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
551 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
553 (define_expand "<plusminus_insn><mode>3"
554 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
555 (plusminus:AVX256MODEF2P
556 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
557 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
558 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
559 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
561 (define_insn "*avx_<plusminus_insn><mode>3"
562 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
563 (plusminus:AVXMODEF2P
564 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
565 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
566 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
567 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
568 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
569 [(set_attr "type" "sseadd")
570 (set_attr "prefix" "vex")
571 (set_attr "mode" "<avxvecmode>")])
573 (define_expand "<plusminus_insn><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
575 (plusminus:SSEMODEF2P
576 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
577 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
579 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
581 (define_insn "*<plusminus_insn><mode>3"
582 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
586 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
587 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
588 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
589 [(set_attr "type" "sseadd")
590 (set_attr "mode" "<MODE>")])
592 (define_insn "*avx_vm<plusminus_insn><mode>3"
593 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
594 (vec_merge:SSEMODEF2P
595 (plusminus:SSEMODEF2P
596 (match_operand:SSEMODEF2P 1 "register_operand" "x")
597 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
600 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
601 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
602 [(set_attr "type" "sseadd")
603 (set_attr "prefix" "vex")
604 (set_attr "mode" "<ssescalarmode>")])
606 (define_insn "<sse>_vm<plusminus_insn><mode>3"
607 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
608 (vec_merge:SSEMODEF2P
609 (plusminus:SSEMODEF2P
610 (match_operand:SSEMODEF2P 1 "register_operand" "0")
611 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
614 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
615 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sseadd")
617 (set_attr "mode" "<ssescalarmode>")])
619 (define_expand "mul<mode>3"
620 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
622 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
623 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
624 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
625 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
627 (define_insn "*avx_mul<mode>3"
628 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
630 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
631 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
632 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
633 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
634 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
635 [(set_attr "type" "ssemul")
636 (set_attr "prefix" "vex")
637 (set_attr "mode" "<avxvecmode>")])
639 (define_expand "mul<mode>3"
640 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
643 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
644 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
645 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
647 (define_insn "*mul<mode>3"
648 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
650 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
652 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
653 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
654 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
655 [(set_attr "type" "ssemul")
656 (set_attr "mode" "<MODE>")])
658 (define_insn "*avx_vmmul<mode>3"
659 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
660 (vec_merge:SSEMODEF2P
662 (match_operand:SSEMODEF2P 1 "register_operand" "x")
663 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
666 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
667 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
668 [(set_attr "type" "ssemul")
669 (set_attr "prefix" "vex")
670 (set_attr "mode" "<ssescalarmode>")])
672 (define_insn "<sse>_vmmul<mode>3"
673 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
674 (vec_merge:SSEMODEF2P
676 (match_operand:SSEMODEF2P 1 "register_operand" "0")
677 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
680 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
681 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
682 [(set_attr "type" "ssemul")
683 (set_attr "mode" "<ssescalarmode>")])
685 (define_expand "divv8sf3"
686 [(set (match_operand:V8SF 0 "register_operand" "")
687 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
688 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
691 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
693 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
694 && flag_finite_math_only && !flag_trapping_math
695 && flag_unsafe_math_optimizations)
697 ix86_emit_swdivsf (operands[0], operands[1],
698 operands[2], V8SFmode);
703 (define_expand "divv4df3"
704 [(set (match_operand:V4DF 0 "register_operand" "")
705 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
706 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
708 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
710 (define_insn "avx_div<mode>3"
711 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
713 (match_operand:AVXMODEF2P 1 "register_operand" "x")
714 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
715 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
716 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
717 [(set_attr "type" "ssediv")
718 (set_attr "prefix" "vex")
719 (set_attr "mode" "<MODE>")])
721 (define_expand "divv4sf3"
722 [(set (match_operand:V4SF 0 "register_operand" "")
723 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
724 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
727 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
728 && flag_finite_math_only && !flag_trapping_math
729 && flag_unsafe_math_optimizations)
731 ix86_emit_swdivsf (operands[0], operands[1],
732 operands[2], V4SFmode);
737 (define_expand "divv2df3"
738 [(set (match_operand:V2DF 0 "register_operand" "")
739 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
740 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
744 (define_insn "*avx_div<mode>3"
745 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
747 (match_operand:SSEMODEF2P 1 "register_operand" "x")
748 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
749 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
750 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
751 [(set_attr "type" "ssediv")
752 (set_attr "prefix" "vex")
753 (set_attr "mode" "<MODE>")])
755 (define_insn "<sse>_div<mode>3"
756 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
758 (match_operand:SSEMODEF2P 1 "register_operand" "0")
759 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
760 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
761 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
762 [(set_attr "type" "ssediv")
763 (set_attr "mode" "<MODE>")])
765 (define_insn "*avx_vmdiv<mode>3"
766 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
767 (vec_merge:SSEMODEF2P
769 (match_operand:SSEMODEF2P 1 "register_operand" "x")
770 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
773 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
774 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
775 [(set_attr "type" "ssediv")
776 (set_attr "prefix" "vex")
777 (set_attr "mode" "<ssescalarmode>")])
779 (define_insn "<sse>_vmdiv<mode>3"
780 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
781 (vec_merge:SSEMODEF2P
783 (match_operand:SSEMODEF2P 1 "register_operand" "0")
784 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
787 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
788 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
789 [(set_attr "type" "ssediv")
790 (set_attr "mode" "<ssescalarmode>")])
792 (define_insn "avx_rcpv8sf2"
793 [(set (match_operand:V8SF 0 "register_operand" "=x")
795 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
797 "vrcpps\t{%1, %0|%0, %1}"
798 [(set_attr "type" "sse")
799 (set_attr "prefix" "vex")
800 (set_attr "mode" "V8SF")])
802 (define_insn "sse_rcpv4sf2"
803 [(set (match_operand:V4SF 0 "register_operand" "=x")
805 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
807 "%vrcpps\t{%1, %0|%0, %1}"
808 [(set_attr "type" "sse")
809 (set_attr "atom_sse_attr" "rcp")
810 (set_attr "prefix" "maybe_vex")
811 (set_attr "mode" "V4SF")])
813 (define_insn "*avx_vmrcpv4sf2"
814 [(set (match_operand:V4SF 0 "register_operand" "=x")
816 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
818 (match_operand:V4SF 2 "register_operand" "x")
821 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
822 [(set_attr "type" "sse")
823 (set_attr "prefix" "vex")
824 (set_attr "mode" "SF")])
826 (define_insn "sse_vmrcpv4sf2"
827 [(set (match_operand:V4SF 0 "register_operand" "=x")
829 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
831 (match_operand:V4SF 2 "register_operand" "0")
834 "rcpss\t{%1, %0|%0, %1}"
835 [(set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "mode" "SF")])
839 (define_expand "sqrtv8sf2"
840 [(set (match_operand:V8SF 0 "register_operand" "")
841 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
844 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
845 && flag_finite_math_only && !flag_trapping_math
846 && flag_unsafe_math_optimizations)
848 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
853 (define_insn "avx_sqrtv8sf2"
854 [(set (match_operand:V8SF 0 "register_operand" "=x")
855 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
857 "vsqrtps\t{%1, %0|%0, %1}"
858 [(set_attr "type" "sse")
859 (set_attr "prefix" "vex")
860 (set_attr "mode" "V8SF")])
862 (define_expand "sqrtv4sf2"
863 [(set (match_operand:V4SF 0 "register_operand" "")
864 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
867 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
868 && flag_finite_math_only && !flag_trapping_math
869 && flag_unsafe_math_optimizations)
871 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
876 (define_insn "sse_sqrtv4sf2"
877 [(set (match_operand:V4SF 0 "register_operand" "=x")
878 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
880 "%vsqrtps\t{%1, %0|%0, %1}"
881 [(set_attr "type" "sse")
882 (set_attr "atom_sse_attr" "sqrt")
883 (set_attr "prefix" "maybe_vex")
884 (set_attr "mode" "V4SF")])
886 (define_insn "sqrtv4df2"
887 [(set (match_operand:V4DF 0 "register_operand" "=x")
888 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
890 "vsqrtpd\t{%1, %0|%0, %1}"
891 [(set_attr "type" "sse")
892 (set_attr "prefix" "vex")
893 (set_attr "mode" "V4DF")])
895 (define_insn "sqrtv2df2"
896 [(set (match_operand:V2DF 0 "register_operand" "=x")
897 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
899 "%vsqrtpd\t{%1, %0|%0, %1}"
900 [(set_attr "type" "sse")
901 (set_attr "prefix" "maybe_vex")
902 (set_attr "mode" "V2DF")])
904 (define_insn "*avx_vmsqrt<mode>2"
905 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
906 (vec_merge:SSEMODEF2P
908 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
909 (match_operand:SSEMODEF2P 2 "register_operand" "x")
911 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
912 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
913 [(set_attr "type" "sse")
914 (set_attr "prefix" "vex")
915 (set_attr "mode" "<ssescalarmode>")])
917 (define_insn "<sse>_vmsqrt<mode>2"
918 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
919 (vec_merge:SSEMODEF2P
921 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
922 (match_operand:SSEMODEF2P 2 "register_operand" "0")
924 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
925 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
926 [(set_attr "type" "sse")
927 (set_attr "atom_sse_attr" "sqrt")
928 (set_attr "mode" "<ssescalarmode>")])
930 (define_expand "rsqrtv8sf2"
931 [(set (match_operand:V8SF 0 "register_operand" "")
933 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
934 "TARGET_AVX && TARGET_SSE_MATH"
936 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
940 (define_insn "avx_rsqrtv8sf2"
941 [(set (match_operand:V8SF 0 "register_operand" "=x")
943 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
945 "vrsqrtps\t{%1, %0|%0, %1}"
946 [(set_attr "type" "sse")
947 (set_attr "prefix" "vex")
948 (set_attr "mode" "V8SF")])
950 (define_expand "rsqrtv4sf2"
951 [(set (match_operand:V4SF 0 "register_operand" "")
953 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
956 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
960 (define_insn "sse_rsqrtv4sf2"
961 [(set (match_operand:V4SF 0 "register_operand" "=x")
963 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
965 "%vrsqrtps\t{%1, %0|%0, %1}"
966 [(set_attr "type" "sse")
967 (set_attr "prefix" "maybe_vex")
968 (set_attr "mode" "V4SF")])
970 (define_insn "*avx_vmrsqrtv4sf2"
971 [(set (match_operand:V4SF 0 "register_operand" "=x")
973 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
975 (match_operand:V4SF 2 "register_operand" "x")
978 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
979 [(set_attr "type" "sse")
980 (set_attr "prefix" "vex")
981 (set_attr "mode" "SF")])
983 (define_insn "sse_vmrsqrtv4sf2"
984 [(set (match_operand:V4SF 0 "register_operand" "=x")
986 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
988 (match_operand:V4SF 2 "register_operand" "0")
991 "rsqrtss\t{%1, %0|%0, %1}"
992 [(set_attr "type" "sse")
993 (set_attr "mode" "SF")])
995 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
996 ;; isn't really correct, as those rtl operators aren't defined when
997 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1001 (smaxmin:AVX256MODEF2P
1002 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1004 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_expand "<code><mode>3"
1012 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1014 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1015 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1016 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1018 if (!flag_finite_math_only)
1019 operands[1] = force_reg (<MODE>mode, operands[1]);
1020 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1023 (define_insn "*avx_<code><mode>3_finite"
1024 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1026 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1027 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "prefix" "vex")
1033 (set_attr "mode" "<MODE>")])
1035 (define_insn "*<code><mode>3_finite"
1036 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1038 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1039 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1040 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1041 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1042 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1043 [(set_attr "type" "sseadd")
1044 (set_attr "mode" "<MODE>")])
1046 (define_insn "*avx_<code><mode>3"
1047 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1049 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1050 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1051 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1052 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1053 [(set_attr "type" "sseadd")
1054 (set_attr "prefix" "vex")
1055 (set_attr "mode" "<avxvecmode>")])
1057 (define_insn "*<code><mode>3"
1058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1060 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1061 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1062 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1063 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1064 [(set_attr "type" "sseadd")
1065 (set_attr "mode" "<MODE>")])
1067 (define_insn "*avx_vm<code><mode>3"
1068 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1069 (vec_merge:SSEMODEF2P
1071 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1072 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1075 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1076 "v<maxmin_float>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1077 [(set_attr "type" "sse")
1078 (set_attr "prefix" "vex")
1079 (set_attr "mode" "<ssescalarmode>")])
1081 (define_insn "<sse>_vm<code><mode>3"
1082 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1083 (vec_merge:SSEMODEF2P
1085 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1089 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1090 "<maxmin_float>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1091 [(set_attr "type" "sseadd")
1092 (set_attr "mode" "<ssescalarmode>")])
1094 ;; These versions of the min/max patterns implement exactly the operations
1095 ;; min = (op1 < op2 ? op1 : op2)
1096 ;; max = (!(op1 < op2) ? op1 : op2)
1097 ;; Their operands are not commutative, and thus they may be used in the
1098 ;; presence of -0.0 and NaN.
1100 (define_insn "*avx_ieee_smin<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*avx_ieee_smax<mode>3"
1113 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1116 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "prefix" "vex")
1122 (set_attr "mode" "<avxvecmode>")])
1124 (define_insn "*ieee_smin<mode>3"
1125 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1127 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1130 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1131 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1132 [(set_attr "type" "sseadd")
1133 (set_attr "mode" "<MODE>")])
1135 (define_insn "*ieee_smax<mode>3"
1136 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1138 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1139 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1141 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1142 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1143 [(set_attr "type" "sseadd")
1144 (set_attr "mode" "<MODE>")])
1146 (define_insn "avx_addsubv8sf3"
1147 [(set (match_operand:V8SF 0 "register_operand" "=x")
1150 (match_operand:V8SF 1 "register_operand" "x")
1151 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1152 (minus:V8SF (match_dup 1) (match_dup 2))
1155 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1156 [(set_attr "type" "sseadd")
1157 (set_attr "prefix" "vex")
1158 (set_attr "mode" "V8SF")])
1160 (define_insn "avx_addsubv4df3"
1161 [(set (match_operand:V4DF 0 "register_operand" "=x")
1164 (match_operand:V4DF 1 "register_operand" "x")
1165 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1166 (minus:V4DF (match_dup 1) (match_dup 2))
1169 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1170 [(set_attr "type" "sseadd")
1171 (set_attr "prefix" "vex")
1172 (set_attr "mode" "V4DF")])
1174 (define_insn "*avx_addsubv4sf3"
1175 [(set (match_operand:V4SF 0 "register_operand" "=x")
1178 (match_operand:V4SF 1 "register_operand" "x")
1179 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1180 (minus:V4SF (match_dup 1) (match_dup 2))
1183 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1184 [(set_attr "type" "sseadd")
1185 (set_attr "prefix" "vex")
1186 (set_attr "mode" "V4SF")])
1188 (define_insn "sse3_addsubv4sf3"
1189 [(set (match_operand:V4SF 0 "register_operand" "=x")
1192 (match_operand:V4SF 1 "register_operand" "0")
1193 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1194 (minus:V4SF (match_dup 1) (match_dup 2))
1197 "addsubps\t{%2, %0|%0, %2}"
1198 [(set_attr "type" "sseadd")
1199 (set_attr "prefix_rep" "1")
1200 (set_attr "mode" "V4SF")])
1202 (define_insn "*avx_addsubv2df3"
1203 [(set (match_operand:V2DF 0 "register_operand" "=x")
1206 (match_operand:V2DF 1 "register_operand" "x")
1207 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1208 (minus:V2DF (match_dup 1) (match_dup 2))
1211 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "type" "sseadd")
1213 (set_attr "prefix" "vex")
1214 (set_attr "mode" "V2DF")])
1216 (define_insn "sse3_addsubv2df3"
1217 [(set (match_operand:V2DF 0 "register_operand" "=x")
1220 (match_operand:V2DF 1 "register_operand" "0")
1221 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1222 (minus:V2DF (match_dup 1) (match_dup 2))
1225 "addsubpd\t{%2, %0|%0, %2}"
1226 [(set_attr "type" "sseadd")
1227 (set_attr "atom_unit" "complex")
1228 (set_attr "mode" "V2DF")])
1230 (define_insn "avx_h<plusminus_insn>v4df3"
1231 [(set (match_operand:V4DF 0 "register_operand" "=x")
1236 (match_operand:V4DF 1 "register_operand" "x")
1237 (parallel [(const_int 0)]))
1238 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1240 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1241 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1245 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1246 (parallel [(const_int 0)]))
1247 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1249 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1250 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1252 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1253 [(set_attr "type" "sseadd")
1254 (set_attr "prefix" "vex")
1255 (set_attr "mode" "V4DF")])
1257 (define_insn "avx_h<plusminus_insn>v8sf3"
1258 [(set (match_operand:V8SF 0 "register_operand" "=x")
1264 (match_operand:V8SF 1 "register_operand" "x")
1265 (parallel [(const_int 0)]))
1266 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1268 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1273 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1274 (parallel [(const_int 0)]))
1275 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1282 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1283 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1285 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1286 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1289 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1290 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1292 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1293 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1295 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1296 [(set_attr "type" "sseadd")
1297 (set_attr "prefix" "vex")
1298 (set_attr "mode" "V8SF")])
1300 (define_insn "*avx_h<plusminus_insn>v4sf3"
1301 [(set (match_operand:V4SF 0 "register_operand" "=x")
1306 (match_operand:V4SF 1 "register_operand" "x")
1307 (parallel [(const_int 0)]))
1308 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1310 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1311 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1315 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1316 (parallel [(const_int 0)]))
1317 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1319 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1320 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1322 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1323 [(set_attr "type" "sseadd")
1324 (set_attr "prefix" "vex")
1325 (set_attr "mode" "V4SF")])
1327 (define_insn "sse3_h<plusminus_insn>v4sf3"
1328 [(set (match_operand:V4SF 0 "register_operand" "=x")
1333 (match_operand:V4SF 1 "register_operand" "0")
1334 (parallel [(const_int 0)]))
1335 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1337 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1338 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1342 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1343 (parallel [(const_int 0)]))
1344 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1346 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1347 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1349 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1350 [(set_attr "type" "sseadd")
1351 (set_attr "atom_unit" "complex")
1352 (set_attr "prefix_rep" "1")
1353 (set_attr "mode" "V4SF")])
1355 (define_insn "*avx_h<plusminus_insn>v2df3"
1356 [(set (match_operand:V2DF 0 "register_operand" "=x")
1360 (match_operand:V2DF 1 "register_operand" "x")
1361 (parallel [(const_int 0)]))
1362 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1365 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1366 (parallel [(const_int 0)]))
1367 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1369 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1370 [(set_attr "type" "sseadd")
1371 (set_attr "prefix" "vex")
1372 (set_attr "mode" "V2DF")])
1374 (define_insn "sse3_h<plusminus_insn>v2df3"
1375 [(set (match_operand:V2DF 0 "register_operand" "=x")
1379 (match_operand:V2DF 1 "register_operand" "0")
1380 (parallel [(const_int 0)]))
1381 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1384 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1385 (parallel [(const_int 0)]))
1386 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1388 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1389 [(set_attr "type" "sseadd")
1390 (set_attr "mode" "V2DF")])
1392 (define_expand "reduc_splus_v4sf"
1393 [(match_operand:V4SF 0 "register_operand" "")
1394 (match_operand:V4SF 1 "register_operand" "")]
1399 rtx tmp = gen_reg_rtx (V4SFmode);
1400 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1401 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1404 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1408 (define_expand "reduc_splus_v2df"
1409 [(match_operand:V2DF 0 "register_operand" "")
1410 (match_operand:V2DF 1 "register_operand" "")]
1413 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1417 (define_expand "reduc_smax_v4sf"
1418 [(match_operand:V4SF 0 "register_operand" "")
1419 (match_operand:V4SF 1 "register_operand" "")]
1422 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1426 (define_expand "reduc_smin_v4sf"
1427 [(match_operand:V4SF 0 "register_operand" "")
1428 (match_operand:V4SF 1 "register_operand" "")]
1431 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1437 ;; Parallel floating point comparisons
1439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1441 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1442 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1444 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1445 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1446 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1449 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1450 [(set_attr "type" "ssecmp")
1451 (set_attr "length_immediate" "1")
1452 (set_attr "prefix" "vex")
1453 (set_attr "mode" "<MODE>")])
1455 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1456 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1457 (vec_merge:SSEMODEF2P
1459 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1460 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1461 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1466 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1467 [(set_attr "type" "ssecmp")
1468 (set_attr "length_immediate" "1")
1469 (set_attr "prefix" "vex")
1470 (set_attr "mode" "<ssescalarmode>")])
1472 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1473 ;; may generate 256bit vector compare instructions.
1474 (define_insn "*avx_maskcmp<mode>3"
1475 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1476 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1477 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1478 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1479 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1480 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1481 [(set_attr "type" "ssecmp")
1482 (set_attr "prefix" "vex")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<avxvecmode>")])
1486 (define_insn "<sse>_maskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1488 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1489 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1490 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1492 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1493 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1494 [(set_attr "type" "ssecmp")
1495 (set_attr "length_immediate" "1")
1496 (set_attr "mode" "<MODE>")])
1498 (define_insn "*avx_vmmaskcmp<mode>3"
1499 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1500 (vec_merge:SSEMODEF2P
1501 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1502 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1503 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1506 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1507 "vcmp%D3s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1508 [(set_attr "type" "ssecmp")
1509 (set_attr "prefix" "vex")
1510 (set_attr "mode" "<ssescalarmode>")])
1512 (define_insn "<sse>_vmmaskcmp<mode>3"
1513 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1514 (vec_merge:SSEMODEF2P
1515 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1516 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1520 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1521 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1522 [(set_attr "type" "ssecmp")
1523 (set_attr "length_immediate" "1")
1524 (set_attr "mode" "<ssescalarmode>")])
1526 (define_insn "<sse>_comi"
1527 [(set (reg:CCFP FLAGS_REG)
1530 (match_operand:<ssevecmode> 0 "register_operand" "x")
1531 (parallel [(const_int 0)]))
1533 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1534 (parallel [(const_int 0)]))))]
1535 "SSE_FLOAT_MODE_P (<MODE>mode)"
1536 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1537 [(set_attr "type" "ssecomi")
1538 (set_attr "prefix" "maybe_vex")
1539 (set_attr "prefix_rep" "0")
1540 (set (attr "prefix_data16")
1541 (if_then_else (eq_attr "mode" "DF")
1543 (const_string "0")))
1544 (set_attr "mode" "<MODE>")])
1546 (define_insn "<sse>_ucomi"
1547 [(set (reg:CCFPU FLAGS_REG)
1550 (match_operand:<ssevecmode> 0 "register_operand" "x")
1551 (parallel [(const_int 0)]))
1553 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1554 (parallel [(const_int 0)]))))]
1555 "SSE_FLOAT_MODE_P (<MODE>mode)"
1556 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1557 [(set_attr "type" "ssecomi")
1558 (set_attr "prefix" "maybe_vex")
1559 (set_attr "prefix_rep" "0")
1560 (set (attr "prefix_data16")
1561 (if_then_else (eq_attr "mode" "DF")
1563 (const_string "0")))
1564 (set_attr "mode" "<MODE>")])
1566 (define_expand "vcond<mode>"
1567 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1568 (if_then_else:AVXMODEF2P
1569 (match_operator 3 ""
1570 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1571 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1572 (match_operand:AVXMODEF2P 1 "general_operand" "")
1573 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1574 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1575 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1577 bool ok = ix86_expand_fp_vcond (operands);
1582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1584 ;; Parallel floating point logical operations
1586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1588 (define_insn "avx_andnot<mode>3"
1589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1592 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1593 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1594 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1595 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1596 [(set_attr "type" "sselog")
1597 (set_attr "prefix" "vex")
1598 (set_attr "mode" "<avxvecmode>")])
1600 (define_insn "<sse>_andnot<mode>3"
1601 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1604 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1605 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1606 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1607 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "sselog")
1609 (set_attr "mode" "<MODE>")])
1611 (define_expand "<code><mode>3"
1612 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1613 (any_logic:AVX256MODEF2P
1614 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1615 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1616 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1617 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1619 (define_insn "*avx_<code><mode>3"
1620 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1621 (any_logic:AVXMODEF2P
1622 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1623 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1624 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1625 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1627 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1628 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1630 return "v<logic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
1632 [(set_attr "type" "sselog")
1633 (set_attr "prefix" "vex")
1634 (set_attr "mode" "<avxvecmode>")])
1636 (define_expand "<code><mode>3"
1637 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1638 (any_logic:SSEMODEF2P
1639 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1640 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1641 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1642 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1644 (define_insn "*<code><mode>3"
1645 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1646 (any_logic:SSEMODEF2P
1647 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1648 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1649 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1650 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1652 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1653 return "<logic>ps\t{%2, %0|%0, %2}";
1655 return "<logic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}";
1657 [(set_attr "type" "sselog")
1658 (set_attr "mode" "<MODE>")])
1660 (define_expand "copysign<mode>3"
1663 (not:SSEMODEF2P (match_dup 3))
1664 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1666 (and:SSEMODEF2P (match_dup 3)
1667 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1668 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1669 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1670 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1672 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1674 operands[4] = gen_reg_rtx (<MODE>mode);
1675 operands[5] = gen_reg_rtx (<MODE>mode);
1678 ;; Also define scalar versions. These are used for abs, neg, and
1679 ;; conditional move. Using subregs into vector modes causes register
1680 ;; allocation lossage. These patterns do not allow memory operands
1681 ;; because the native instructions read the full 128-bits.
1683 (define_insn "*avx_andnot<mode>3"
1684 [(set (match_operand:MODEF 0 "register_operand" "=x")
1687 (match_operand:MODEF 1 "register_operand" "x"))
1688 (match_operand:MODEF 2 "register_operand" "x")))]
1689 "AVX_FLOAT_MODE_P (<MODE>mode)"
1690 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1691 [(set_attr "type" "sselog")
1692 (set_attr "prefix" "vex")
1693 (set_attr "mode" "<ssevecmode>")])
1695 (define_insn "*andnot<mode>3"
1696 [(set (match_operand:MODEF 0 "register_operand" "=x")
1699 (match_operand:MODEF 1 "register_operand" "0"))
1700 (match_operand:MODEF 2 "register_operand" "x")))]
1701 "SSE_FLOAT_MODE_P (<MODE>mode)"
1702 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1703 [(set_attr "type" "sselog")
1704 (set_attr "mode" "<ssevecmode>")])
1706 (define_insn "*avx_<code><mode>3"
1707 [(set (match_operand:MODEF 0 "register_operand" "=x")
1709 (match_operand:MODEF 1 "register_operand" "x")
1710 (match_operand:MODEF 2 "register_operand" "x")))]
1711 "AVX_FLOAT_MODE_P (<MODE>mode)"
1713 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1714 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1716 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1718 [(set_attr "type" "sselog")
1719 (set_attr "prefix" "vex")
1720 (set_attr "mode" "<ssevecmode>")])
1722 (define_insn "*<code><mode>3"
1723 [(set (match_operand:MODEF 0 "register_operand" "=x")
1725 (match_operand:MODEF 1 "register_operand" "0")
1726 (match_operand:MODEF 2 "register_operand" "x")))]
1727 "SSE_FLOAT_MODE_P (<MODE>mode)"
1729 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1730 return "<logic>ps\t{%2, %0|%0, %2}";
1732 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1734 [(set_attr "type" "sselog")
1735 (set_attr "mode" "<ssevecmode>")])
1737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1739 ;; FMA4 floating point multiply/accumulate instructions. This
1740 ;; includes the scalar version of the instructions as well as the
1743 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1745 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1746 ;; combine to generate a multiply/add with two memory references. We then
1747 ;; split this insn, into loading up the destination register with one of the
1748 ;; memory operations. If we don't manage to split the insn, reload will
1749 ;; generate the appropriate moves. The reason this is needed, is that combine
1750 ;; has already folded one of the memory references into both the multiply and
1751 ;; add insns, and it can't generate a new pseudo. I.e.:
1752 ;; (set (reg1) (mem (addr1)))
1753 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1754 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1756 (define_insn "fma4_fmadd<mode>4256"
1757 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1760 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1761 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1762 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1763 "TARGET_FMA4 && TARGET_FUSED_MADD"
1764 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1765 [(set_attr "type" "ssemuladd")
1766 (set_attr "mode" "<MODE>")])
1768 ;; Floating multiply and subtract.
1769 (define_insn "fma4_fmsub<mode>4256"
1770 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1773 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1774 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1775 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1776 "TARGET_FMA4 && TARGET_FUSED_MADD"
1777 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1778 [(set_attr "type" "ssemuladd")
1779 (set_attr "mode" "<MODE>")])
1781 ;; Floating point negative multiply and add.
1782 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1783 (define_insn "fma4_fnmadd<mode>4256"
1784 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1786 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1788 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1789 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1790 "TARGET_FMA4 && TARGET_FUSED_MADD"
1791 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 ;; Floating point negative multiply and subtract.
1796 (define_insn "fma4_fnmsub<mode>4256"
1797 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1801 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1802 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1803 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1804 "TARGET_FMA4 && TARGET_FUSED_MADD"
1805 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1806 [(set_attr "type" "ssemuladd")
1807 (set_attr "mode" "<MODE>")])
1809 (define_insn "fma4_fmadd<mode>4"
1810 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1813 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1814 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1815 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1816 "TARGET_FMA4 && TARGET_FUSED_MADD"
1817 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 ;; For the scalar operations, use operand1 for the upper words that aren't
1822 ;; modified, so restrict the forms that are generated.
1823 ;; Scalar version of fmadd.
1824 (define_insn "fma4_vmfmadd<mode>4"
1825 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1826 (vec_merge:SSEMODEF2P
1829 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1830 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1831 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1834 "TARGET_FMA4 && TARGET_FUSED_MADD"
1835 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 ;; Floating multiply and subtract.
1840 ;; Allow two memory operands the same as fmadd.
1841 (define_insn "fma4_fmsub<mode>4"
1842 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1845 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1846 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1847 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1848 "TARGET_FMA4 && TARGET_FUSED_MADD"
1849 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 ;; For the scalar operations, use operand1 for the upper words that aren't
1854 ;; modified, so restrict the forms that are generated.
1855 ;; Scalar version of fmsub.
1856 (define_insn "fma4_vmfmsub<mode>4"
1857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1858 (vec_merge:SSEMODEF2P
1861 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1862 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1863 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1867 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1868 [(set_attr "type" "ssemuladd")
1869 (set_attr "mode" "<MODE>")])
1871 ;; Floating point negative multiply and add.
1872 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1873 (define_insn "fma4_fnmadd<mode>4"
1874 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1876 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1878 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1879 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1880 "TARGET_FMA4 && TARGET_FUSED_MADD"
1881 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1882 [(set_attr "type" "ssemuladd")
1883 (set_attr "mode" "<MODE>")])
1885 ;; For the scalar operations, use operand1 for the upper words that aren't
1886 ;; modified, so restrict the forms that are generated.
1887 ;; Scalar version of fnmadd.
1888 (define_insn "fma4_vmfnmadd<mode>4"
1889 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1890 (vec_merge:SSEMODEF2P
1892 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1894 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1895 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1898 "TARGET_FMA4 && TARGET_FUSED_MADD"
1899 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1900 [(set_attr "type" "ssemuladd")
1901 (set_attr "mode" "<MODE>")])
1903 ;; Floating point negative multiply and subtract.
1904 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1905 (define_insn "fma4_fnmsub<mode>4"
1906 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1910 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1911 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1912 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1913 "TARGET_FMA4 && TARGET_FUSED_MADD"
1914 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 ;; For the scalar operations, use operand1 for the upper words that aren't
1919 ;; modified, so restrict the forms that are generated.
1920 ;; Scalar version of fnmsub.
1921 (define_insn "fma4_vmfnmsub<mode>4"
1922 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1923 (vec_merge:SSEMODEF2P
1927 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1928 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1929 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1932 "TARGET_FMA4 && TARGET_FUSED_MADD"
1933 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1934 [(set_attr "type" "ssemuladd")
1935 (set_attr "mode" "<MODE>")])
1937 (define_insn "fma4i_fmadd<mode>4256"
1938 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1942 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1943 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1944 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1945 UNSPEC_FMA4_INTRINSIC))]
1947 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1948 [(set_attr "type" "ssemuladd")
1949 (set_attr "mode" "<MODE>")])
1951 (define_insn "fma4i_fmsub<mode>4256"
1952 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1956 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1957 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1958 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1959 UNSPEC_FMA4_INTRINSIC))]
1961 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1962 [(set_attr "type" "ssemuladd")
1963 (set_attr "mode" "<MODE>")])
1965 (define_insn "fma4i_fnmadd<mode>4256"
1966 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1969 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1971 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1972 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1973 UNSPEC_FMA4_INTRINSIC))]
1975 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1976 [(set_attr "type" "ssemuladd")
1977 (set_attr "mode" "<MODE>")])
1979 (define_insn "fma4i_fnmsub<mode>4256"
1980 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1985 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1986 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1987 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1988 UNSPEC_FMA4_INTRINSIC))]
1990 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1991 [(set_attr "type" "ssemuladd")
1992 (set_attr "mode" "<MODE>")])
1994 (define_insn "fma4i_fmadd<mode>4"
1995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1999 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2000 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2001 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2002 UNSPEC_FMA4_INTRINSIC))]
2004 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2005 [(set_attr "type" "ssemuladd")
2006 (set_attr "mode" "<MODE>")])
2008 (define_insn "fma4i_fmsub<mode>4"
2009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2013 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2015 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2016 UNSPEC_FMA4_INTRINSIC))]
2018 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2019 [(set_attr "type" "ssemuladd")
2020 (set_attr "mode" "<MODE>")])
2022 (define_insn "fma4i_fnmadd<mode>4"
2023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2026 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2028 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2029 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2030 UNSPEC_FMA4_INTRINSIC))]
2032 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2033 [(set_attr "type" "ssemuladd")
2034 (set_attr "mode" "<MODE>")])
2036 (define_insn "fma4i_fnmsub<mode>4"
2037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2042 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2044 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2045 UNSPEC_FMA4_INTRINSIC))]
2047 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2048 [(set_attr "type" "ssemuladd")
2049 (set_attr "mode" "<MODE>")])
2051 ;; For the scalar operations, use operand1 for the upper words that aren't
2052 ;; modified, so restrict the forms that are accepted.
2053 (define_insn "fma4i_vmfmadd<mode>4"
2054 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056 [(vec_merge:SSEMODEF2P
2059 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2061 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2064 UNSPEC_FMA4_INTRINSIC))]
2066 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2067 [(set_attr "type" "ssemuladd")
2068 (set_attr "mode" "<ssescalarmode>")])
2070 (define_insn "fma4i_vmfmsub<mode>4"
2071 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2073 [(vec_merge:SSEMODEF2P
2076 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2077 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2078 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2081 UNSPEC_FMA4_INTRINSIC))]
2083 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2084 [(set_attr "type" "ssemuladd")
2085 (set_attr "mode" "<ssescalarmode>")])
2087 (define_insn "fma4i_vmfnmadd<mode>4"
2088 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2090 [(vec_merge:SSEMODEF2P
2092 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2094 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2095 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2098 UNSPEC_FMA4_INTRINSIC))]
2100 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2101 [(set_attr "type" "ssemuladd")
2102 (set_attr "mode" "<ssescalarmode>")])
2104 (define_insn "fma4i_vmfnmsub<mode>4"
2105 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2107 [(vec_merge:SSEMODEF2P
2111 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2112 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2113 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2116 UNSPEC_FMA4_INTRINSIC))]
2118 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2119 [(set_attr "type" "ssemuladd")
2120 (set_attr "mode" "<ssescalarmode>")])
2122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2124 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2128 (define_insn "fma4_fmaddsubv8sf4"
2129 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2133 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2134 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2135 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2142 "TARGET_FMA4 && TARGET_FUSED_MADD"
2143 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2144 [(set_attr "type" "ssemuladd")
2145 (set_attr "mode" "V8SF")])
2147 (define_insn "fma4_fmaddsubv4df4"
2148 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2152 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2153 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2154 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2161 "TARGET_FMA4 && TARGET_FUSED_MADD"
2162 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2163 [(set_attr "type" "ssemuladd")
2164 (set_attr "mode" "V4DF")])
2166 (define_insn "fma4_fmaddsubv4sf4"
2167 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2171 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2172 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2173 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2180 "TARGET_FMA4 && TARGET_FUSED_MADD"
2181 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2182 [(set_attr "type" "ssemuladd")
2183 (set_attr "mode" "V4SF")])
2185 (define_insn "fma4_fmaddsubv2df4"
2186 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2190 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2191 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2192 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2199 "TARGET_FMA4 && TARGET_FUSED_MADD"
2200 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2201 [(set_attr "type" "ssemuladd")
2202 (set_attr "mode" "V2DF")])
2204 (define_insn "fma4_fmsubaddv8sf4"
2205 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2209 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2210 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2211 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2218 "TARGET_FMA4 && TARGET_FUSED_MADD"
2219 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2220 [(set_attr "type" "ssemuladd")
2221 (set_attr "mode" "V8SF")])
2223 (define_insn "fma4_fmsubaddv4df4"
2224 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2228 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2229 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2230 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2237 "TARGET_FMA4 && TARGET_FUSED_MADD"
2238 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2239 [(set_attr "type" "ssemuladd")
2240 (set_attr "mode" "V4DF")])
2242 (define_insn "fma4_fmsubaddv4sf4"
2243 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2247 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2248 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2249 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2256 "TARGET_FMA4 && TARGET_FUSED_MADD"
2257 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2258 [(set_attr "type" "ssemuladd")
2259 (set_attr "mode" "V4SF")])
2261 (define_insn "fma4_fmsubaddv2df4"
2262 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2266 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2267 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2268 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2275 "TARGET_FMA4 && TARGET_FUSED_MADD"
2276 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2277 [(set_attr "type" "ssemuladd")
2278 (set_attr "mode" "V2DF")])
2280 (define_insn "fma4i_fmaddsubv8sf4"
2281 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2286 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2287 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2288 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2295 UNSPEC_FMA4_INTRINSIC))]
2297 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2298 [(set_attr "type" "ssemuladd")
2299 (set_attr "mode" "V8SF")])
2301 (define_insn "fma4i_fmaddsubv4df4"
2302 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2307 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2308 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2309 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2316 UNSPEC_FMA4_INTRINSIC))]
2318 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2319 [(set_attr "type" "ssemuladd")
2320 (set_attr "mode" "V4DF")])
2322 (define_insn "fma4i_fmaddsubv4sf4"
2323 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2328 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2329 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2330 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2337 UNSPEC_FMA4_INTRINSIC))]
2339 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2340 [(set_attr "type" "ssemuladd")
2341 (set_attr "mode" "V4SF")])
2343 (define_insn "fma4i_fmaddsubv2df4"
2344 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2349 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2350 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2351 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2358 UNSPEC_FMA4_INTRINSIC))]
2360 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2361 [(set_attr "type" "ssemuladd")
2362 (set_attr "mode" "V2DF")])
2364 (define_insn "fma4i_fmsubaddv8sf4"
2365 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2370 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2371 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2372 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2379 UNSPEC_FMA4_INTRINSIC))]
2381 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2382 [(set_attr "type" "ssemuladd")
2383 (set_attr "mode" "V8SF")])
2385 (define_insn "fma4i_fmsubaddv4df4"
2386 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2391 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2392 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2393 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2400 UNSPEC_FMA4_INTRINSIC))]
2402 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2403 [(set_attr "type" "ssemuladd")
2404 (set_attr "mode" "V4DF")])
2406 (define_insn "fma4i_fmsubaddv4sf4"
2407 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2412 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2413 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2414 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2421 UNSPEC_FMA4_INTRINSIC))]
2423 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2424 [(set_attr "type" "ssemuladd")
2425 (set_attr "mode" "V4SF")])
2427 (define_insn "fma4i_fmsubaddv2df4"
2428 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2433 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2434 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2435 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2442 UNSPEC_FMA4_INTRINSIC))]
2444 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2445 [(set_attr "type" "ssemuladd")
2446 (set_attr "mode" "V2DF")])
2448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2450 ;; Parallel single-precision floating point conversion operations
2452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2454 (define_insn "sse_cvtpi2ps"
2455 [(set (match_operand:V4SF 0 "register_operand" "=x")
2458 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2459 (match_operand:V4SF 1 "register_operand" "0")
2462 "cvtpi2ps\t{%2, %0|%0, %2}"
2463 [(set_attr "type" "ssecvt")
2464 (set_attr "mode" "V4SF")])
2466 (define_insn "sse_cvtps2pi"
2467 [(set (match_operand:V2SI 0 "register_operand" "=y")
2469 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2471 (parallel [(const_int 0) (const_int 1)])))]
2473 "cvtps2pi\t{%1, %0|%0, %1}"
2474 [(set_attr "type" "ssecvt")
2475 (set_attr "unit" "mmx")
2476 (set_attr "mode" "DI")])
2478 (define_insn "sse_cvttps2pi"
2479 [(set (match_operand:V2SI 0 "register_operand" "=y")
2481 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2482 (parallel [(const_int 0) (const_int 1)])))]
2484 "cvttps2pi\t{%1, %0|%0, %1}"
2485 [(set_attr "type" "ssecvt")
2486 (set_attr "unit" "mmx")
2487 (set_attr "prefix_rep" "0")
2488 (set_attr "mode" "SF")])
2490 (define_insn "*avx_cvtsi2ss"
2491 [(set (match_operand:V4SF 0 "register_operand" "=x")
2494 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2495 (match_operand:V4SF 1 "register_operand" "x")
2498 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2499 [(set_attr "type" "sseicvt")
2500 (set_attr "prefix" "vex")
2501 (set_attr "mode" "SF")])
2503 (define_insn "sse_cvtsi2ss"
2504 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2507 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2508 (match_operand:V4SF 1 "register_operand" "0,0")
2511 "cvtsi2ss\t{%2, %0|%0, %2}"
2512 [(set_attr "type" "sseicvt")
2513 (set_attr "athlon_decode" "vector,double")
2514 (set_attr "amdfam10_decode" "vector,double")
2515 (set_attr "mode" "SF")])
2517 (define_insn "*avx_cvtsi2ssq"
2518 [(set (match_operand:V4SF 0 "register_operand" "=x")
2521 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2522 (match_operand:V4SF 1 "register_operand" "x")
2524 "TARGET_AVX && TARGET_64BIT"
2525 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2526 [(set_attr "type" "sseicvt")
2527 (set_attr "length_vex" "4")
2528 (set_attr "prefix" "vex")
2529 (set_attr "mode" "SF")])
2531 (define_insn "sse_cvtsi2ssq"
2532 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2535 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2536 (match_operand:V4SF 1 "register_operand" "0,0")
2538 "TARGET_SSE && TARGET_64BIT"
2539 "cvtsi2ssq\t{%2, %0|%0, %2}"
2540 [(set_attr "type" "sseicvt")
2541 (set_attr "prefix_rex" "1")
2542 (set_attr "athlon_decode" "vector,double")
2543 (set_attr "amdfam10_decode" "vector,double")
2544 (set_attr "mode" "SF")])
2546 (define_insn "sse_cvtss2si"
2547 [(set (match_operand:SI 0 "register_operand" "=r,r")
2550 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2551 (parallel [(const_int 0)]))]
2552 UNSPEC_FIX_NOTRUNC))]
2554 "%vcvtss2si\t{%1, %0|%0, %1}"
2555 [(set_attr "type" "sseicvt")
2556 (set_attr "athlon_decode" "double,vector")
2557 (set_attr "prefix_rep" "1")
2558 (set_attr "prefix" "maybe_vex")
2559 (set_attr "mode" "SI")])
2561 (define_insn "sse_cvtss2si_2"
2562 [(set (match_operand:SI 0 "register_operand" "=r,r")
2563 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2564 UNSPEC_FIX_NOTRUNC))]
2566 "%vcvtss2si\t{%1, %0|%0, %1}"
2567 [(set_attr "type" "sseicvt")
2568 (set_attr "athlon_decode" "double,vector")
2569 (set_attr "amdfam10_decode" "double,double")
2570 (set_attr "prefix_rep" "1")
2571 (set_attr "prefix" "maybe_vex")
2572 (set_attr "mode" "SI")])
2574 (define_insn "sse_cvtss2siq"
2575 [(set (match_operand:DI 0 "register_operand" "=r,r")
2578 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2579 (parallel [(const_int 0)]))]
2580 UNSPEC_FIX_NOTRUNC))]
2581 "TARGET_SSE && TARGET_64BIT"
2582 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2583 [(set_attr "type" "sseicvt")
2584 (set_attr "athlon_decode" "double,vector")
2585 (set_attr "prefix_rep" "1")
2586 (set_attr "prefix" "maybe_vex")
2587 (set_attr "mode" "DI")])
2589 (define_insn "sse_cvtss2siq_2"
2590 [(set (match_operand:DI 0 "register_operand" "=r,r")
2591 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2592 UNSPEC_FIX_NOTRUNC))]
2593 "TARGET_SSE && TARGET_64BIT"
2594 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2595 [(set_attr "type" "sseicvt")
2596 (set_attr "athlon_decode" "double,vector")
2597 (set_attr "amdfam10_decode" "double,double")
2598 (set_attr "prefix_rep" "1")
2599 (set_attr "prefix" "maybe_vex")
2600 (set_attr "mode" "DI")])
2602 (define_insn "sse_cvttss2si"
2603 [(set (match_operand:SI 0 "register_operand" "=r,r")
2606 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2607 (parallel [(const_int 0)]))))]
2609 "%vcvttss2si\t{%1, %0|%0, %1}"
2610 [(set_attr "type" "sseicvt")
2611 (set_attr "athlon_decode" "double,vector")
2612 (set_attr "amdfam10_decode" "double,double")
2613 (set_attr "prefix_rep" "1")
2614 (set_attr "prefix" "maybe_vex")
2615 (set_attr "mode" "SI")])
2617 (define_insn "sse_cvttss2siq"
2618 [(set (match_operand:DI 0 "register_operand" "=r,r")
2621 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2622 (parallel [(const_int 0)]))))]
2623 "TARGET_SSE && TARGET_64BIT"
2624 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2625 [(set_attr "type" "sseicvt")
2626 (set_attr "athlon_decode" "double,vector")
2627 (set_attr "amdfam10_decode" "double,double")
2628 (set_attr "prefix_rep" "1")
2629 (set_attr "prefix" "maybe_vex")
2630 (set_attr "mode" "DI")])
2632 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2633 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2634 (float:AVXMODEDCVTDQ2PS
2635 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2637 "vcvtdq2ps\t{%1, %0|%0, %1}"
2638 [(set_attr "type" "ssecvt")
2639 (set_attr "prefix" "vex")
2640 (set_attr "mode" "<avxvecmode>")])
2642 (define_insn "sse2_cvtdq2ps"
2643 [(set (match_operand:V4SF 0 "register_operand" "=x")
2644 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2646 "cvtdq2ps\t{%1, %0|%0, %1}"
2647 [(set_attr "type" "ssecvt")
2648 (set_attr "mode" "V4SF")])
2650 (define_expand "sse2_cvtudq2ps"
2652 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2654 (lt:V4SF (match_dup 5) (match_dup 3)))
2656 (and:V4SF (match_dup 6) (match_dup 4)))
2657 (set (match_operand:V4SF 0 "register_operand" "")
2658 (plus:V4SF (match_dup 5) (match_dup 7)))]
2661 REAL_VALUE_TYPE TWO32r;
2665 real_ldexp (&TWO32r, &dconst1, 32);
2666 x = const_double_from_real_value (TWO32r, SFmode);
2668 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2669 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2671 for (i = 5; i < 8; i++)
2672 operands[i] = gen_reg_rtx (V4SFmode);
2675 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2676 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2677 (unspec:AVXMODEDCVTPS2DQ
2678 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2679 UNSPEC_FIX_NOTRUNC))]
2681 "vcvtps2dq\t{%1, %0|%0, %1}"
2682 [(set_attr "type" "ssecvt")
2683 (set_attr "prefix" "vex")
2684 (set_attr "mode" "<avxvecmode>")])
2686 (define_insn "sse2_cvtps2dq"
2687 [(set (match_operand:V4SI 0 "register_operand" "=x")
2688 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2689 UNSPEC_FIX_NOTRUNC))]
2691 "cvtps2dq\t{%1, %0|%0, %1}"
2692 [(set_attr "type" "ssecvt")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "mode" "TI")])
2696 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2697 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2698 (fix:AVXMODEDCVTPS2DQ
2699 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2701 "vcvttps2dq\t{%1, %0|%0, %1}"
2702 [(set_attr "type" "ssecvt")
2703 (set_attr "prefix" "vex")
2704 (set_attr "mode" "<avxvecmode>")])
2706 (define_insn "sse2_cvttps2dq"
2707 [(set (match_operand:V4SI 0 "register_operand" "=x")
2708 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2710 "cvttps2dq\t{%1, %0|%0, %1}"
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "prefix_rep" "1")
2713 (set_attr "prefix_data16" "0")
2714 (set_attr "mode" "TI")])
2716 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2718 ;; Parallel double-precision floating point conversion operations
2720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2722 (define_insn "sse2_cvtpi2pd"
2723 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2724 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2726 "cvtpi2pd\t{%1, %0|%0, %1}"
2727 [(set_attr "type" "ssecvt")
2728 (set_attr "unit" "mmx,*")
2729 (set_attr "prefix_data16" "1,*")
2730 (set_attr "mode" "V2DF")])
2732 (define_insn "sse2_cvtpd2pi"
2733 [(set (match_operand:V2SI 0 "register_operand" "=y")
2734 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2735 UNSPEC_FIX_NOTRUNC))]
2737 "cvtpd2pi\t{%1, %0|%0, %1}"
2738 [(set_attr "type" "ssecvt")
2739 (set_attr "unit" "mmx")
2740 (set_attr "prefix_data16" "1")
2741 (set_attr "mode" "DI")])
2743 (define_insn "sse2_cvttpd2pi"
2744 [(set (match_operand:V2SI 0 "register_operand" "=y")
2745 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2747 "cvttpd2pi\t{%1, %0|%0, %1}"
2748 [(set_attr "type" "ssecvt")
2749 (set_attr "unit" "mmx")
2750 (set_attr "prefix_data16" "1")
2751 (set_attr "mode" "TI")])
2753 (define_insn "*avx_cvtsi2sd"
2754 [(set (match_operand:V2DF 0 "register_operand" "=x")
2757 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2758 (match_operand:V2DF 1 "register_operand" "x")
2761 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2762 [(set_attr "type" "sseicvt")
2763 (set_attr "prefix" "vex")
2764 (set_attr "mode" "DF")])
2766 (define_insn "sse2_cvtsi2sd"
2767 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2770 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2771 (match_operand:V2DF 1 "register_operand" "0,0")
2774 "cvtsi2sd\t{%2, %0|%0, %2}"
2775 [(set_attr "type" "sseicvt")
2776 (set_attr "mode" "DF")
2777 (set_attr "athlon_decode" "double,direct")
2778 (set_attr "amdfam10_decode" "vector,double")])
2780 (define_insn "*avx_cvtsi2sdq"
2781 [(set (match_operand:V2DF 0 "register_operand" "=x")
2784 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2785 (match_operand:V2DF 1 "register_operand" "x")
2787 "TARGET_AVX && TARGET_64BIT"
2788 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2789 [(set_attr "type" "sseicvt")
2790 (set_attr "length_vex" "4")
2791 (set_attr "prefix" "vex")
2792 (set_attr "mode" "DF")])
2794 (define_insn "sse2_cvtsi2sdq"
2795 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2798 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2799 (match_operand:V2DF 1 "register_operand" "0,0")
2801 "TARGET_SSE2 && TARGET_64BIT"
2802 "cvtsi2sdq\t{%2, %0|%0, %2}"
2803 [(set_attr "type" "sseicvt")
2804 (set_attr "prefix_rex" "1")
2805 (set_attr "mode" "DF")
2806 (set_attr "athlon_decode" "double,direct")
2807 (set_attr "amdfam10_decode" "vector,double")])
2809 (define_insn "sse2_cvtsd2si"
2810 [(set (match_operand:SI 0 "register_operand" "=r,r")
2813 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2814 (parallel [(const_int 0)]))]
2815 UNSPEC_FIX_NOTRUNC))]
2817 "%vcvtsd2si\t{%1, %0|%0, %1}"
2818 [(set_attr "type" "sseicvt")
2819 (set_attr "athlon_decode" "double,vector")
2820 (set_attr "prefix_rep" "1")
2821 (set_attr "prefix" "maybe_vex")
2822 (set_attr "mode" "SI")])
2824 (define_insn "sse2_cvtsd2si_2"
2825 [(set (match_operand:SI 0 "register_operand" "=r,r")
2826 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2827 UNSPEC_FIX_NOTRUNC))]
2829 "%vcvtsd2si\t{%1, %0|%0, %1}"
2830 [(set_attr "type" "sseicvt")
2831 (set_attr "athlon_decode" "double,vector")
2832 (set_attr "amdfam10_decode" "double,double")
2833 (set_attr "prefix_rep" "1")
2834 (set_attr "prefix" "maybe_vex")
2835 (set_attr "mode" "SI")])
2837 (define_insn "sse2_cvtsd2siq"
2838 [(set (match_operand:DI 0 "register_operand" "=r,r")
2841 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2842 (parallel [(const_int 0)]))]
2843 UNSPEC_FIX_NOTRUNC))]
2844 "TARGET_SSE2 && TARGET_64BIT"
2845 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2846 [(set_attr "type" "sseicvt")
2847 (set_attr "athlon_decode" "double,vector")
2848 (set_attr "prefix_rep" "1")
2849 (set_attr "prefix" "maybe_vex")
2850 (set_attr "mode" "DI")])
2852 (define_insn "sse2_cvtsd2siq_2"
2853 [(set (match_operand:DI 0 "register_operand" "=r,r")
2854 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2855 UNSPEC_FIX_NOTRUNC))]
2856 "TARGET_SSE2 && TARGET_64BIT"
2857 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2858 [(set_attr "type" "sseicvt")
2859 (set_attr "athlon_decode" "double,vector")
2860 (set_attr "amdfam10_decode" "double,double")
2861 (set_attr "prefix_rep" "1")
2862 (set_attr "prefix" "maybe_vex")
2863 (set_attr "mode" "DI")])
2865 (define_insn "sse2_cvttsd2si"
2866 [(set (match_operand:SI 0 "register_operand" "=r,r")
2869 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2870 (parallel [(const_int 0)]))))]
2872 "%vcvttsd2si\t{%1, %0|%0, %1}"
2873 [(set_attr "type" "sseicvt")
2874 (set_attr "prefix_rep" "1")
2875 (set_attr "prefix" "maybe_vex")
2876 (set_attr "mode" "SI")
2877 (set_attr "athlon_decode" "double,vector")
2878 (set_attr "amdfam10_decode" "double,double")])
2880 (define_insn "sse2_cvttsd2siq"
2881 [(set (match_operand:DI 0 "register_operand" "=r,r")
2884 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2885 (parallel [(const_int 0)]))))]
2886 "TARGET_SSE2 && TARGET_64BIT"
2887 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2888 [(set_attr "type" "sseicvt")
2889 (set_attr "prefix_rep" "1")
2890 (set_attr "prefix" "maybe_vex")
2891 (set_attr "mode" "DI")
2892 (set_attr "athlon_decode" "double,vector")
2893 (set_attr "amdfam10_decode" "double,double")])
2895 (define_insn "avx_cvtdq2pd256"
2896 [(set (match_operand:V4DF 0 "register_operand" "=x")
2897 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2899 "vcvtdq2pd\t{%1, %0|%0, %1}"
2900 [(set_attr "type" "ssecvt")
2901 (set_attr "prefix" "vex")
2902 (set_attr "mode" "V4DF")])
2904 (define_insn "sse2_cvtdq2pd"
2905 [(set (match_operand:V2DF 0 "register_operand" "=x")
2908 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2909 (parallel [(const_int 0) (const_int 1)]))))]
2911 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2912 [(set_attr "type" "ssecvt")
2913 (set_attr "prefix" "maybe_vex")
2914 (set_attr "mode" "V2DF")])
2916 (define_insn "avx_cvtpd2dq256"
2917 [(set (match_operand:V4SI 0 "register_operand" "=x")
2918 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2919 UNSPEC_FIX_NOTRUNC))]
2921 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2922 [(set_attr "type" "ssecvt")
2923 (set_attr "prefix" "vex")
2924 (set_attr "mode" "OI")])
2926 (define_expand "sse2_cvtpd2dq"
2927 [(set (match_operand:V4SI 0 "register_operand" "")
2929 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2933 "operands[2] = CONST0_RTX (V2SImode);")
2935 (define_insn "*sse2_cvtpd2dq"
2936 [(set (match_operand:V4SI 0 "register_operand" "=x")
2938 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2940 (match_operand:V2SI 2 "const0_operand" "")))]
2942 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2943 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2944 [(set_attr "type" "ssecvt")
2945 (set_attr "prefix_rep" "1")
2946 (set_attr "prefix_data16" "0")
2947 (set_attr "prefix" "maybe_vex")
2948 (set_attr "mode" "TI")
2949 (set_attr "amdfam10_decode" "double")])
2951 (define_insn "avx_cvttpd2dq256"
2952 [(set (match_operand:V4SI 0 "register_operand" "=x")
2953 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2955 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2956 [(set_attr "type" "ssecvt")
2957 (set_attr "prefix" "vex")
2958 (set_attr "mode" "OI")])
2960 (define_expand "sse2_cvttpd2dq"
2961 [(set (match_operand:V4SI 0 "register_operand" "")
2963 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2966 "operands[2] = CONST0_RTX (V2SImode);")
2968 (define_insn "*sse2_cvttpd2dq"
2969 [(set (match_operand:V4SI 0 "register_operand" "=x")
2971 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2972 (match_operand:V2SI 2 "const0_operand" "")))]
2974 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2975 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2976 [(set_attr "type" "ssecvt")
2977 (set_attr "prefix" "maybe_vex")
2978 (set_attr "mode" "TI")
2979 (set_attr "amdfam10_decode" "double")])
2981 (define_insn "*avx_cvtsd2ss"
2982 [(set (match_operand:V4SF 0 "register_operand" "=x")
2985 (float_truncate:V2SF
2986 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2987 (match_operand:V4SF 1 "register_operand" "x")
2990 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2991 [(set_attr "type" "ssecvt")
2992 (set_attr "prefix" "vex")
2993 (set_attr "mode" "SF")])
2995 (define_insn "sse2_cvtsd2ss"
2996 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2999 (float_truncate:V2SF
3000 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3001 (match_operand:V4SF 1 "register_operand" "0,0")
3004 "cvtsd2ss\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "ssecvt")
3006 (set_attr "athlon_decode" "vector,double")
3007 (set_attr "amdfam10_decode" "vector,double")
3008 (set_attr "mode" "SF")])
3010 (define_insn "*avx_cvtss2sd"
3011 [(set (match_operand:V2DF 0 "register_operand" "=x")
3015 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3016 (parallel [(const_int 0) (const_int 1)])))
3017 (match_operand:V2DF 1 "register_operand" "x")
3020 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3021 [(set_attr "type" "ssecvt")
3022 (set_attr "prefix" "vex")
3023 (set_attr "mode" "DF")])
3025 (define_insn "sse2_cvtss2sd"
3026 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3030 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3031 (parallel [(const_int 0) (const_int 1)])))
3032 (match_operand:V2DF 1 "register_operand" "0,0")
3035 "cvtss2sd\t{%2, %0|%0, %2}"
3036 [(set_attr "type" "ssecvt")
3037 (set_attr "amdfam10_decode" "vector,double")
3038 (set_attr "mode" "DF")])
3040 (define_insn "avx_cvtpd2ps256"
3041 [(set (match_operand:V4SF 0 "register_operand" "=x")
3042 (float_truncate:V4SF
3043 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3045 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3046 [(set_attr "type" "ssecvt")
3047 (set_attr "prefix" "vex")
3048 (set_attr "mode" "V4SF")])
3050 (define_expand "sse2_cvtpd2ps"
3051 [(set (match_operand:V4SF 0 "register_operand" "")
3053 (float_truncate:V2SF
3054 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3057 "operands[2] = CONST0_RTX (V2SFmode);")
3059 (define_insn "*sse2_cvtpd2ps"
3060 [(set (match_operand:V4SF 0 "register_operand" "=x")
3062 (float_truncate:V2SF
3063 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3064 (match_operand:V2SF 2 "const0_operand" "")))]
3066 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3067 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3068 [(set_attr "type" "ssecvt")
3069 (set_attr "prefix_data16" "1")
3070 (set_attr "prefix" "maybe_vex")
3071 (set_attr "mode" "V4SF")
3072 (set_attr "amdfam10_decode" "double")])
3074 (define_insn "avx_cvtps2pd256"
3075 [(set (match_operand:V4DF 0 "register_operand" "=x")
3077 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3079 "vcvtps2pd\t{%1, %0|%0, %1}"
3080 [(set_attr "type" "ssecvt")
3081 (set_attr "prefix" "vex")
3082 (set_attr "mode" "V4DF")])
3084 (define_insn "sse2_cvtps2pd"
3085 [(set (match_operand:V2DF 0 "register_operand" "=x")
3088 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3089 (parallel [(const_int 0) (const_int 1)]))))]
3091 "%vcvtps2pd\t{%1, %0|%0, %1}"
3092 [(set_attr "type" "ssecvt")
3093 (set_attr "prefix" "maybe_vex")
3094 (set_attr "mode" "V2DF")
3095 (set_attr "prefix_data16" "0")
3096 (set_attr "amdfam10_decode" "direct")])
3098 (define_expand "vec_unpacks_hi_v4sf"
3103 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3104 (parallel [(const_int 6)
3108 (set (match_operand:V2DF 0 "register_operand" "")
3112 (parallel [(const_int 0) (const_int 1)]))))]
3115 operands[2] = gen_reg_rtx (V4SFmode);
3118 (define_expand "vec_unpacks_lo_v4sf"
3119 [(set (match_operand:V2DF 0 "register_operand" "")
3122 (match_operand:V4SF 1 "nonimmediate_operand" "")
3123 (parallel [(const_int 0) (const_int 1)]))))]
3126 (define_expand "vec_unpacks_float_hi_v8hi"
3127 [(match_operand:V4SF 0 "register_operand" "")
3128 (match_operand:V8HI 1 "register_operand" "")]
3131 rtx tmp = gen_reg_rtx (V4SImode);
3133 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3134 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3138 (define_expand "vec_unpacks_float_lo_v8hi"
3139 [(match_operand:V4SF 0 "register_operand" "")
3140 (match_operand:V8HI 1 "register_operand" "")]
3143 rtx tmp = gen_reg_rtx (V4SImode);
3145 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3146 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3150 (define_expand "vec_unpacku_float_hi_v8hi"
3151 [(match_operand:V4SF 0 "register_operand" "")
3152 (match_operand:V8HI 1 "register_operand" "")]
3155 rtx tmp = gen_reg_rtx (V4SImode);
3157 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3158 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3162 (define_expand "vec_unpacku_float_lo_v8hi"
3163 [(match_operand:V4SF 0 "register_operand" "")
3164 (match_operand:V8HI 1 "register_operand" "")]
3167 rtx tmp = gen_reg_rtx (V4SImode);
3169 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3170 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3174 (define_expand "vec_unpacks_float_hi_v4si"
3177 (match_operand:V4SI 1 "nonimmediate_operand" "")
3178 (parallel [(const_int 2)
3182 (set (match_operand:V2DF 0 "register_operand" "")
3186 (parallel [(const_int 0) (const_int 1)]))))]
3188 "operands[2] = gen_reg_rtx (V4SImode);")
3190 (define_expand "vec_unpacks_float_lo_v4si"
3191 [(set (match_operand:V2DF 0 "register_operand" "")
3194 (match_operand:V4SI 1 "nonimmediate_operand" "")
3195 (parallel [(const_int 0) (const_int 1)]))))]
3198 (define_expand "vec_unpacku_float_hi_v4si"
3201 (match_operand:V4SI 1 "nonimmediate_operand" "")
3202 (parallel [(const_int 2)
3210 (parallel [(const_int 0) (const_int 1)]))))
3212 (lt:V2DF (match_dup 6) (match_dup 3)))
3214 (and:V2DF (match_dup 7) (match_dup 4)))
3215 (set (match_operand:V2DF 0 "register_operand" "")
3216 (plus:V2DF (match_dup 6) (match_dup 8)))]
3219 REAL_VALUE_TYPE TWO32r;
3223 real_ldexp (&TWO32r, &dconst1, 32);
3224 x = const_double_from_real_value (TWO32r, DFmode);
3226 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3227 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3229 operands[5] = gen_reg_rtx (V4SImode);
3231 for (i = 6; i < 9; i++)
3232 operands[i] = gen_reg_rtx (V2DFmode);
3235 (define_expand "vec_unpacku_float_lo_v4si"
3239 (match_operand:V4SI 1 "nonimmediate_operand" "")
3240 (parallel [(const_int 0) (const_int 1)]))))
3242 (lt:V2DF (match_dup 5) (match_dup 3)))
3244 (and:V2DF (match_dup 6) (match_dup 4)))
3245 (set (match_operand:V2DF 0 "register_operand" "")
3246 (plus:V2DF (match_dup 5) (match_dup 7)))]
3249 REAL_VALUE_TYPE TWO32r;
3253 real_ldexp (&TWO32r, &dconst1, 32);
3254 x = const_double_from_real_value (TWO32r, DFmode);
3256 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3257 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3259 for (i = 5; i < 8; i++)
3260 operands[i] = gen_reg_rtx (V2DFmode);
3263 (define_expand "vec_pack_trunc_v2df"
3264 [(match_operand:V4SF 0 "register_operand" "")
3265 (match_operand:V2DF 1 "nonimmediate_operand" "")
3266 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3271 r1 = gen_reg_rtx (V4SFmode);
3272 r2 = gen_reg_rtx (V4SFmode);
3274 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3275 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3276 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3280 (define_expand "vec_pack_sfix_trunc_v2df"
3281 [(match_operand:V4SI 0 "register_operand" "")
3282 (match_operand:V2DF 1 "nonimmediate_operand" "")
3283 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3288 r1 = gen_reg_rtx (V4SImode);
3289 r2 = gen_reg_rtx (V4SImode);
3291 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3292 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3293 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3294 gen_lowpart (V2DImode, r1),
3295 gen_lowpart (V2DImode, r2)));
3299 (define_expand "vec_pack_sfix_v2df"
3300 [(match_operand:V4SI 0 "register_operand" "")
3301 (match_operand:V2DF 1 "nonimmediate_operand" "")
3302 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3307 r1 = gen_reg_rtx (V4SImode);
3308 r2 = gen_reg_rtx (V4SImode);
3310 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3311 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3312 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3313 gen_lowpart (V2DImode, r1),
3314 gen_lowpart (V2DImode, r2)));
3318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3320 ;; Parallel single-precision floating point element swizzling
3322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3324 (define_expand "sse_movhlps_exp"
3325 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3328 (match_operand:V4SF 1 "nonimmediate_operand" "")
3329 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3330 (parallel [(const_int 6)
3335 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3337 (define_insn "*avx_movhlps"
3338 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3341 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3342 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3343 (parallel [(const_int 6)
3347 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3349 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3350 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3351 vmovhps\t{%2, %0|%0, %2}"
3352 [(set_attr "type" "ssemov")
3353 (set_attr "prefix" "vex")
3354 (set_attr "mode" "V4SF,V2SF,V2SF")])
3356 (define_insn "sse_movhlps"
3357 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3360 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3361 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3362 (parallel [(const_int 6)
3366 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3368 movhlps\t{%2, %0|%0, %2}
3369 movlps\t{%H2, %0|%0, %H2}
3370 movhps\t{%2, %0|%0, %2}"
3371 [(set_attr "type" "ssemov")
3372 (set_attr "mode" "V4SF,V2SF,V2SF")])
3374 (define_expand "sse_movlhps_exp"
3375 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3378 (match_operand:V4SF 1 "nonimmediate_operand" "")
3379 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3380 (parallel [(const_int 0)
3385 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3387 (define_insn "*avx_movlhps"
3388 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3391 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3392 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3393 (parallel [(const_int 0)
3397 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3399 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3400 vmovhps\t{%2, %1, %0|%0, %1, %2}
3401 vmovlps\t{%2, %H0|%H0, %2}"
3402 [(set_attr "type" "ssemov")
3403 (set_attr "prefix" "vex")
3404 (set_attr "mode" "V4SF,V2SF,V2SF")])
3406 (define_insn "sse_movlhps"
3407 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3410 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3411 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3412 (parallel [(const_int 0)
3416 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3418 movlhps\t{%2, %0|%0, %2}
3419 movhps\t{%2, %0|%0, %2}
3420 movlps\t{%2, %H0|%H0, %2}"
3421 [(set_attr "type" "ssemov")
3422 (set_attr "mode" "V4SF,V2SF,V2SF")])
3424 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3425 (define_insn "avx_unpckhps256"
3426 [(set (match_operand:V8SF 0 "register_operand" "=x")
3429 (match_operand:V8SF 1 "register_operand" "x")
3430 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3431 (parallel [(const_int 2) (const_int 10)
3432 (const_int 3) (const_int 11)
3433 (const_int 6) (const_int 14)
3434 (const_int 7) (const_int 15)])))]
3436 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3437 [(set_attr "type" "sselog")
3438 (set_attr "prefix" "vex")
3439 (set_attr "mode" "V8SF")])
3441 (define_insn "*avx_interleave_highv4sf"
3442 [(set (match_operand:V4SF 0 "register_operand" "=x")
3445 (match_operand:V4SF 1 "register_operand" "x")
3446 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3447 (parallel [(const_int 2) (const_int 6)
3448 (const_int 3) (const_int 7)])))]
3450 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3451 [(set_attr "type" "sselog")
3452 (set_attr "prefix" "vex")
3453 (set_attr "mode" "V4SF")])
3455 (define_insn "vec_interleave_highv4sf"
3456 [(set (match_operand:V4SF 0 "register_operand" "=x")
3459 (match_operand:V4SF 1 "register_operand" "0")
3460 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3461 (parallel [(const_int 2) (const_int 6)
3462 (const_int 3) (const_int 7)])))]
3464 "unpckhps\t{%2, %0|%0, %2}"
3465 [(set_attr "type" "sselog")
3466 (set_attr "mode" "V4SF")])
3468 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3469 (define_insn "avx_unpcklps256"
3470 [(set (match_operand:V8SF 0 "register_operand" "=x")
3473 (match_operand:V8SF 1 "register_operand" "x")
3474 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3475 (parallel [(const_int 0) (const_int 8)
3476 (const_int 1) (const_int 9)
3477 (const_int 4) (const_int 12)
3478 (const_int 5) (const_int 13)])))]
3480 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3481 [(set_attr "type" "sselog")
3482 (set_attr "prefix" "vex")
3483 (set_attr "mode" "V8SF")])
3485 (define_insn "*avx_interleave_lowv4sf"
3486 [(set (match_operand:V4SF 0 "register_operand" "=x")
3489 (match_operand:V4SF 1 "register_operand" "x")
3490 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3491 (parallel [(const_int 0) (const_int 4)
3492 (const_int 1) (const_int 5)])))]
3494 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3495 [(set_attr "type" "sselog")
3496 (set_attr "prefix" "vex")
3497 (set_attr "mode" "V4SF")])
3499 (define_insn "vec_interleave_lowv4sf"
3500 [(set (match_operand:V4SF 0 "register_operand" "=x")
3503 (match_operand:V4SF 1 "register_operand" "0")
3504 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3505 (parallel [(const_int 0) (const_int 4)
3506 (const_int 1) (const_int 5)])))]
3508 "unpcklps\t{%2, %0|%0, %2}"
3509 [(set_attr "type" "sselog")
3510 (set_attr "mode" "V4SF")])
3512 ;; These are modeled with the same vec_concat as the others so that we
3513 ;; capture users of shufps that can use the new instructions
3514 (define_insn "avx_movshdup256"
3515 [(set (match_operand:V8SF 0 "register_operand" "=x")
3518 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3520 (parallel [(const_int 1) (const_int 1)
3521 (const_int 3) (const_int 3)
3522 (const_int 5) (const_int 5)
3523 (const_int 7) (const_int 7)])))]
3525 "vmovshdup\t{%1, %0|%0, %1}"
3526 [(set_attr "type" "sse")
3527 (set_attr "prefix" "vex")
3528 (set_attr "mode" "V8SF")])
3530 (define_insn "sse3_movshdup"
3531 [(set (match_operand:V4SF 0 "register_operand" "=x")
3534 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3536 (parallel [(const_int 1)
3541 "%vmovshdup\t{%1, %0|%0, %1}"
3542 [(set_attr "type" "sse")
3543 (set_attr "prefix_rep" "1")
3544 (set_attr "prefix" "maybe_vex")
3545 (set_attr "mode" "V4SF")])
3547 (define_insn "avx_movsldup256"
3548 [(set (match_operand:V8SF 0 "register_operand" "=x")
3551 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3553 (parallel [(const_int 0) (const_int 0)
3554 (const_int 2) (const_int 2)
3555 (const_int 4) (const_int 4)
3556 (const_int 6) (const_int 6)])))]
3558 "vmovsldup\t{%1, %0|%0, %1}"
3559 [(set_attr "type" "sse")
3560 (set_attr "prefix" "vex")
3561 (set_attr "mode" "V8SF")])
3563 (define_insn "sse3_movsldup"
3564 [(set (match_operand:V4SF 0 "register_operand" "=x")
3567 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3569 (parallel [(const_int 0)
3574 "%vmovsldup\t{%1, %0|%0, %1}"
3575 [(set_attr "type" "sse")
3576 (set_attr "prefix_rep" "1")
3577 (set_attr "prefix" "maybe_vex")
3578 (set_attr "mode" "V4SF")])
3580 (define_expand "avx_shufps256"
3581 [(match_operand:V8SF 0 "register_operand" "")
3582 (match_operand:V8SF 1 "register_operand" "")
3583 (match_operand:V8SF 2 "nonimmediate_operand" "")
3584 (match_operand:SI 3 "const_int_operand" "")]
3587 int mask = INTVAL (operands[3]);
3588 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3589 GEN_INT ((mask >> 0) & 3),
3590 GEN_INT ((mask >> 2) & 3),
3591 GEN_INT (((mask >> 4) & 3) + 8),
3592 GEN_INT (((mask >> 6) & 3) + 8),
3593 GEN_INT (((mask >> 0) & 3) + 4),
3594 GEN_INT (((mask >> 2) & 3) + 4),
3595 GEN_INT (((mask >> 4) & 3) + 12),
3596 GEN_INT (((mask >> 6) & 3) + 12)));
3600 ;; One bit in mask selects 2 elements.
3601 (define_insn "avx_shufps256_1"
3602 [(set (match_operand:V8SF 0 "register_operand" "=x")
3605 (match_operand:V8SF 1 "register_operand" "x")
3606 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3607 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3608 (match_operand 4 "const_0_to_3_operand" "")
3609 (match_operand 5 "const_8_to_11_operand" "")
3610 (match_operand 6 "const_8_to_11_operand" "")
3611 (match_operand 7 "const_4_to_7_operand" "")
3612 (match_operand 8 "const_4_to_7_operand" "")
3613 (match_operand 9 "const_12_to_15_operand" "")
3614 (match_operand 10 "const_12_to_15_operand" "")])))]
3616 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3617 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3618 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3619 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3622 mask = INTVAL (operands[3]);
3623 mask |= INTVAL (operands[4]) << 2;
3624 mask |= (INTVAL (operands[5]) - 8) << 4;
3625 mask |= (INTVAL (operands[6]) - 8) << 6;
3626 operands[3] = GEN_INT (mask);
3628 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3630 [(set_attr "type" "sselog")
3631 (set_attr "length_immediate" "1")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V8SF")])
3635 (define_expand "sse_shufps"
3636 [(match_operand:V4SF 0 "register_operand" "")
3637 (match_operand:V4SF 1 "register_operand" "")
3638 (match_operand:V4SF 2 "nonimmediate_operand" "")
3639 (match_operand:SI 3 "const_int_operand" "")]
3642 int mask = INTVAL (operands[3]);
3643 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3644 GEN_INT ((mask >> 0) & 3),
3645 GEN_INT ((mask >> 2) & 3),
3646 GEN_INT (((mask >> 4) & 3) + 4),
3647 GEN_INT (((mask >> 6) & 3) + 4)));
3651 (define_insn "*avx_shufps_<mode>"
3652 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3653 (vec_select:SSEMODE4S
3654 (vec_concat:<ssedoublesizemode>
3655 (match_operand:SSEMODE4S 1 "register_operand" "x")
3656 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3657 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3658 (match_operand 4 "const_0_to_3_operand" "")
3659 (match_operand 5 "const_4_to_7_operand" "")
3660 (match_operand 6 "const_4_to_7_operand" "")])))]
3664 mask |= INTVAL (operands[3]) << 0;
3665 mask |= INTVAL (operands[4]) << 2;
3666 mask |= (INTVAL (operands[5]) - 4) << 4;
3667 mask |= (INTVAL (operands[6]) - 4) << 6;
3668 operands[3] = GEN_INT (mask);
3670 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3672 [(set_attr "type" "sselog")
3673 (set_attr "length_immediate" "1")
3674 (set_attr "prefix" "vex")
3675 (set_attr "mode" "V4SF")])
3677 (define_insn "sse_shufps_<mode>"
3678 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3679 (vec_select:SSEMODE4S
3680 (vec_concat:<ssedoublesizemode>
3681 (match_operand:SSEMODE4S 1 "register_operand" "0")
3682 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3683 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3684 (match_operand 4 "const_0_to_3_operand" "")
3685 (match_operand 5 "const_4_to_7_operand" "")
3686 (match_operand 6 "const_4_to_7_operand" "")])))]
3690 mask |= INTVAL (operands[3]) << 0;
3691 mask |= INTVAL (operands[4]) << 2;
3692 mask |= (INTVAL (operands[5]) - 4) << 4;
3693 mask |= (INTVAL (operands[6]) - 4) << 6;
3694 operands[3] = GEN_INT (mask);
3696 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3698 [(set_attr "type" "sselog")
3699 (set_attr "length_immediate" "1")
3700 (set_attr "mode" "V4SF")])
3702 (define_insn "sse_storehps"
3703 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3705 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3706 (parallel [(const_int 2) (const_int 3)])))]
3709 %vmovhps\t{%1, %0|%0, %1}
3710 %vmovhlps\t{%1, %d0|%d0, %1}
3711 %vmovlps\t{%H1, %d0|%d0, %H1}"
3712 [(set_attr "type" "ssemov")
3713 (set_attr "prefix" "maybe_vex")
3714 (set_attr "mode" "V2SF,V4SF,V2SF")])
3716 (define_expand "sse_loadhps_exp"
3717 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3720 (match_operand:V4SF 1 "nonimmediate_operand" "")
3721 (parallel [(const_int 0) (const_int 1)]))
3722 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3724 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3726 (define_insn "*avx_loadhps"
3727 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3730 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3731 (parallel [(const_int 0) (const_int 1)]))
3732 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3735 vmovhps\t{%2, %1, %0|%0, %1, %2}
3736 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3737 vmovlps\t{%2, %H0|%H0, %2}"
3738 [(set_attr "type" "ssemov")
3739 (set_attr "prefix" "vex")
3740 (set_attr "mode" "V2SF,V4SF,V2SF")])
3742 (define_insn "sse_loadhps"
3743 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3746 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3747 (parallel [(const_int 0) (const_int 1)]))
3748 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3751 movhps\t{%2, %0|%0, %2}
3752 movlhps\t{%2, %0|%0, %2}
3753 movlps\t{%2, %H0|%H0, %2}"
3754 [(set_attr "type" "ssemov")
3755 (set_attr "mode" "V2SF,V4SF,V2SF")])
3757 (define_insn "*avx_storelps"
3758 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3760 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3761 (parallel [(const_int 0) (const_int 1)])))]
3764 vmovlps\t{%1, %0|%0, %1}
3765 vmovaps\t{%1, %0|%0, %1}
3766 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3767 [(set_attr "type" "ssemov")
3768 (set_attr "prefix" "vex")
3769 (set_attr "mode" "V2SF,V2DF,V2SF")])
3771 (define_insn "sse_storelps"
3772 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3774 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3775 (parallel [(const_int 0) (const_int 1)])))]
3778 movlps\t{%1, %0|%0, %1}
3779 movaps\t{%1, %0|%0, %1}
3780 movlps\t{%1, %0|%0, %1}"
3781 [(set_attr "type" "ssemov")
3782 (set_attr "mode" "V2SF,V4SF,V2SF")])
3784 (define_expand "sse_loadlps_exp"
3785 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3787 (match_operand:V2SF 2 "nonimmediate_operand" "")
3789 (match_operand:V4SF 1 "nonimmediate_operand" "")
3790 (parallel [(const_int 2) (const_int 3)]))))]
3792 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3794 (define_insn "*avx_loadlps"
3795 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3797 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3799 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3800 (parallel [(const_int 2) (const_int 3)]))))]
3803 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3804 vmovlps\t{%2, %1, %0|%0, %1, %2}
3805 vmovlps\t{%2, %0|%0, %2}"
3806 [(set_attr "type" "sselog,ssemov,ssemov")
3807 (set_attr "length_immediate" "1,*,*")
3808 (set_attr "prefix" "vex")
3809 (set_attr "mode" "V4SF,V2SF,V2SF")])
3811 (define_insn "sse_loadlps"
3812 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3814 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3816 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3817 (parallel [(const_int 2) (const_int 3)]))))]
3820 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3821 movlps\t{%2, %0|%0, %2}
3822 movlps\t{%2, %0|%0, %2}"
3823 [(set_attr "type" "sselog,ssemov,ssemov")
3824 (set_attr "length_immediate" "1,*,*")
3825 (set_attr "mode" "V4SF,V2SF,V2SF")])
3827 (define_insn "*avx_movss"
3828 [(set (match_operand:V4SF 0 "register_operand" "=x")
3830 (match_operand:V4SF 2 "register_operand" "x")
3831 (match_operand:V4SF 1 "register_operand" "x")
3834 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3835 [(set_attr "type" "ssemov")
3836 (set_attr "prefix" "vex")
3837 (set_attr "mode" "SF")])
3839 (define_insn "sse_movss"
3840 [(set (match_operand:V4SF 0 "register_operand" "=x")
3842 (match_operand:V4SF 2 "register_operand" "x")
3843 (match_operand:V4SF 1 "register_operand" "0")
3846 "movss\t{%2, %0|%0, %2}"
3847 [(set_attr "type" "ssemov")
3848 (set_attr "mode" "SF")])
3850 (define_expand "vec_dupv4sf"
3851 [(set (match_operand:V4SF 0 "register_operand" "")
3853 (match_operand:SF 1 "nonimmediate_operand" "")))]
3857 operands[1] = force_reg (V4SFmode, operands[1]);
3860 (define_insn "*vec_dupv4sf_avx"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3863 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3866 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3867 vbroadcastss\t{%1, %0|%0, %1}"
3868 [(set_attr "type" "sselog1,ssemov")
3869 (set_attr "length_immediate" "1,0")
3870 (set_attr "prefix_extra" "0,1")
3871 (set_attr "prefix" "vex")
3872 (set_attr "mode" "V4SF")])
3874 (define_insn "*vec_dupv4sf"
3875 [(set (match_operand:V4SF 0 "register_operand" "=x")
3877 (match_operand:SF 1 "register_operand" "0")))]
3879 "shufps\t{$0, %0, %0|%0, %0, 0}"
3880 [(set_attr "type" "sselog1")
3881 (set_attr "length_immediate" "1")
3882 (set_attr "mode" "V4SF")])
3884 (define_insn "*vec_concatv2sf_avx"
3885 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3887 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3888 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3891 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3892 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3893 vmovss\t{%1, %0|%0, %1}
3894 punpckldq\t{%2, %0|%0, %2}
3895 movd\t{%1, %0|%0, %1}"
3896 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3897 (set_attr "length_immediate" "*,1,*,*,*")
3898 (set_attr "prefix_extra" "*,1,*,*,*")
3899 (set (attr "prefix")
3900 (if_then_else (eq_attr "alternative" "3,4")
3901 (const_string "orig")
3902 (const_string "vex")))
3903 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3905 ;; Although insertps takes register source, we prefer
3906 ;; unpcklps with register source since it is shorter.
3907 (define_insn "*vec_concatv2sf_sse4_1"
3908 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3910 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3911 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3914 unpcklps\t{%2, %0|%0, %2}
3915 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3916 movss\t{%1, %0|%0, %1}
3917 punpckldq\t{%2, %0|%0, %2}
3918 movd\t{%1, %0|%0, %1}"
3919 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3920 (set_attr "prefix_data16" "*,1,*,*,*")
3921 (set_attr "prefix_extra" "*,1,*,*,*")
3922 (set_attr "length_immediate" "*,1,*,*,*")
3923 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3925 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3926 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3927 ;; alternatives pretty much forces the MMX alternative to be chosen.
3928 (define_insn "*vec_concatv2sf_sse"
3929 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3931 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3932 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3935 unpcklps\t{%2, %0|%0, %2}
3936 movss\t{%1, %0|%0, %1}
3937 punpckldq\t{%2, %0|%0, %2}
3938 movd\t{%1, %0|%0, %1}"
3939 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3940 (set_attr "mode" "V4SF,SF,DI,DI")])
3942 (define_insn "*vec_concatv4sf_avx"
3943 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3945 (match_operand:V2SF 1 "register_operand" " x,x")
3946 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3949 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3950 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3951 [(set_attr "type" "ssemov")
3952 (set_attr "prefix" "vex")
3953 (set_attr "mode" "V4SF,V2SF")])
3955 (define_insn "*vec_concatv4sf_sse"
3956 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3958 (match_operand:V2SF 1 "register_operand" " 0,0")
3959 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3962 movlhps\t{%2, %0|%0, %2}
3963 movhps\t{%2, %0|%0, %2}"
3964 [(set_attr "type" "ssemov")
3965 (set_attr "mode" "V4SF,V2SF")])
3967 (define_expand "vec_init<mode>"
3968 [(match_operand:SSEMODE 0 "register_operand" "")
3969 (match_operand 1 "" "")]
3972 ix86_expand_vector_init (false, operands[0], operands[1]);
3976 (define_insn "*vec_set<mode>_0_avx"
3977 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3978 (vec_merge:SSEMODE4S
3979 (vec_duplicate:SSEMODE4S
3980 (match_operand:<ssescalarmode> 2
3981 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3982 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3986 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3987 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3988 vmovd\t{%2, %0|%0, %2}
3989 vmovss\t{%2, %1, %0|%0, %1, %2}
3990 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3992 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3993 (set_attr "prefix_extra" "*,*,*,*,1,*")
3994 (set_attr "length_immediate" "*,*,*,*,1,*")
3995 (set_attr "prefix" "vex")
3996 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3998 (define_insn "*vec_set<mode>_0_sse4_1"
3999 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4000 (vec_merge:SSEMODE4S
4001 (vec_duplicate:SSEMODE4S
4002 (match_operand:<ssescalarmode> 2
4003 "general_operand" " x,m,*r,x,*rm,*rfF"))
4004 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4008 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4009 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
4010 movd\t{%2, %0|%0, %2}
4011 movss\t{%2, %0|%0, %2}
4012 pinsrd\t{$0, %2, %0|%0, %2, 0}
4014 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4015 (set_attr "prefix_extra" "*,*,*,*,1,*")
4016 (set_attr "length_immediate" "*,*,*,*,1,*")
4017 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4019 (define_insn "*vec_set<mode>_0_sse2"
4020 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4021 (vec_merge:SSEMODE4S
4022 (vec_duplicate:SSEMODE4S
4023 (match_operand:<ssescalarmode> 2
4024 "general_operand" " m,*r,x,x*rfF"))
4025 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4029 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
4030 movd\t{%2, %0|%0, %2}
4031 movss\t{%2, %0|%0, %2}
4033 [(set_attr "type" "ssemov")
4034 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4036 (define_insn "vec_set<mode>_0"
4037 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4038 (vec_merge:SSEMODE4S
4039 (vec_duplicate:SSEMODE4S
4040 (match_operand:<ssescalarmode> 2
4041 "general_operand" " m,x,x*rfF"))
4042 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4046 movss\t{%2, %0|%0, %2}
4047 movss\t{%2, %0|%0, %2}
4049 [(set_attr "type" "ssemov")
4050 (set_attr "mode" "SF")])
4052 ;; A subset is vec_setv4sf.
4053 (define_insn "*vec_setv4sf_avx"
4054 [(set (match_operand:V4SF 0 "register_operand" "=x")
4057 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4058 (match_operand:V4SF 1 "register_operand" "x")
4059 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4062 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4063 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4065 [(set_attr "type" "sselog")
4066 (set_attr "prefix_extra" "1")
4067 (set_attr "length_immediate" "1")
4068 (set_attr "prefix" "vex")
4069 (set_attr "mode" "V4SF")])
4071 (define_insn "*vec_setv4sf_sse4_1"
4072 [(set (match_operand:V4SF 0 "register_operand" "=x")
4075 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4076 (match_operand:V4SF 1 "register_operand" "0")
4077 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4080 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4081 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4083 [(set_attr "type" "sselog")
4084 (set_attr "prefix_data16" "1")
4085 (set_attr "prefix_extra" "1")
4086 (set_attr "length_immediate" "1")
4087 (set_attr "mode" "V4SF")])
4089 (define_insn "*avx_insertps"
4090 [(set (match_operand:V4SF 0 "register_operand" "=x")
4091 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4092 (match_operand:V4SF 1 "register_operand" "x")
4093 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4096 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4097 [(set_attr "type" "sselog")
4098 (set_attr "prefix" "vex")
4099 (set_attr "prefix_extra" "1")
4100 (set_attr "length_immediate" "1")
4101 (set_attr "mode" "V4SF")])
4103 (define_insn "sse4_1_insertps"
4104 [(set (match_operand:V4SF 0 "register_operand" "=x")
4105 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4106 (match_operand:V4SF 1 "register_operand" "0")
4107 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4110 "insertps\t{%3, %2, %0|%0, %2, %3}";
4111 [(set_attr "type" "sselog")
4112 (set_attr "prefix_data16" "1")
4113 (set_attr "prefix_extra" "1")
4114 (set_attr "length_immediate" "1")
4115 (set_attr "mode" "V4SF")])
4118 [(set (match_operand:V4SF 0 "memory_operand" "")
4121 (match_operand:SF 1 "nonmemory_operand" ""))
4124 "TARGET_SSE && reload_completed"
4127 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4131 (define_expand "vec_set<mode>"
4132 [(match_operand:SSEMODE 0 "register_operand" "")
4133 (match_operand:<ssescalarmode> 1 "register_operand" "")
4134 (match_operand 2 "const_int_operand" "")]
4137 ix86_expand_vector_set (false, operands[0], operands[1],
4138 INTVAL (operands[2]));
4142 (define_insn_and_split "*vec_extractv4sf_0"
4143 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4145 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4146 (parallel [(const_int 0)])))]
4147 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4149 "&& reload_completed"
4152 rtx op1 = operands[1];
4154 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4156 op1 = gen_lowpart (SFmode, op1);
4157 emit_move_insn (operands[0], op1);
4161 (define_expand "avx_vextractf128<mode>"
4162 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4163 (match_operand:AVX256MODE 1 "register_operand" "")
4164 (match_operand:SI 2 "const_0_to_1_operand" "")]
4167 switch (INTVAL (operands[2]))
4170 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4173 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4181 (define_insn "vec_extract_lo_<mode>"
4182 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4183 (vec_select:<avxhalfvecmode>
4184 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4185 (parallel [(const_int 0) (const_int 1)])))]
4187 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4188 [(set_attr "type" "sselog")
4189 (set_attr "prefix_extra" "1")
4190 (set_attr "length_immediate" "1")
4191 (set_attr "memory" "none,store")
4192 (set_attr "prefix" "vex")
4193 (set_attr "mode" "V8SF")])
4195 (define_insn "vec_extract_hi_<mode>"
4196 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4197 (vec_select:<avxhalfvecmode>
4198 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4199 (parallel [(const_int 2) (const_int 3)])))]
4201 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4202 [(set_attr "type" "sselog")
4203 (set_attr "prefix_extra" "1")
4204 (set_attr "length_immediate" "1")
4205 (set_attr "memory" "none,store")
4206 (set_attr "prefix" "vex")
4207 (set_attr "mode" "V8SF")])
4209 (define_insn "vec_extract_lo_<mode>"
4210 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4211 (vec_select:<avxhalfvecmode>
4212 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4213 (parallel [(const_int 0) (const_int 1)
4214 (const_int 2) (const_int 3)])))]
4216 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4217 [(set_attr "type" "sselog")
4218 (set_attr "prefix_extra" "1")
4219 (set_attr "length_immediate" "1")
4220 (set_attr "memory" "none,store")
4221 (set_attr "prefix" "vex")
4222 (set_attr "mode" "V8SF")])
4224 (define_insn "vec_extract_hi_<mode>"
4225 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4226 (vec_select:<avxhalfvecmode>
4227 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4228 (parallel [(const_int 4) (const_int 5)
4229 (const_int 6) (const_int 7)])))]
4231 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_extra" "1")
4234 (set_attr "length_immediate" "1")
4235 (set_attr "memory" "none,store")
4236 (set_attr "prefix" "vex")
4237 (set_attr "mode" "V8SF")])
4239 (define_insn "vec_extract_lo_v16hi"
4240 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4242 (match_operand:V16HI 1 "register_operand" "x,x")
4243 (parallel [(const_int 0) (const_int 1)
4244 (const_int 2) (const_int 3)
4245 (const_int 4) (const_int 5)
4246 (const_int 6) (const_int 7)])))]
4248 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4249 [(set_attr "type" "sselog")
4250 (set_attr "prefix_extra" "1")
4251 (set_attr "length_immediate" "1")
4252 (set_attr "memory" "none,store")
4253 (set_attr "prefix" "vex")
4254 (set_attr "mode" "V8SF")])
4256 (define_insn "vec_extract_hi_v16hi"
4257 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4259 (match_operand:V16HI 1 "register_operand" "x,x")
4260 (parallel [(const_int 8) (const_int 9)
4261 (const_int 10) (const_int 11)
4262 (const_int 12) (const_int 13)
4263 (const_int 14) (const_int 15)])))]
4265 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4266 [(set_attr "type" "sselog")
4267 (set_attr "prefix_extra" "1")
4268 (set_attr "length_immediate" "1")
4269 (set_attr "memory" "none,store")
4270 (set_attr "prefix" "vex")
4271 (set_attr "mode" "V8SF")])
4273 (define_insn "vec_extract_lo_v32qi"
4274 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4276 (match_operand:V32QI 1 "register_operand" "x,x")
4277 (parallel [(const_int 0) (const_int 1)
4278 (const_int 2) (const_int 3)
4279 (const_int 4) (const_int 5)
4280 (const_int 6) (const_int 7)
4281 (const_int 8) (const_int 9)
4282 (const_int 10) (const_int 11)
4283 (const_int 12) (const_int 13)
4284 (const_int 14) (const_int 15)])))]
4286 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4287 [(set_attr "type" "sselog")
4288 (set_attr "prefix_extra" "1")
4289 (set_attr "length_immediate" "1")
4290 (set_attr "memory" "none,store")
4291 (set_attr "prefix" "vex")
4292 (set_attr "mode" "V8SF")])
4294 (define_insn "vec_extract_hi_v32qi"
4295 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4297 (match_operand:V32QI 1 "register_operand" "x,x")
4298 (parallel [(const_int 16) (const_int 17)
4299 (const_int 18) (const_int 19)
4300 (const_int 20) (const_int 21)
4301 (const_int 22) (const_int 23)
4302 (const_int 24) (const_int 25)
4303 (const_int 26) (const_int 27)
4304 (const_int 28) (const_int 29)
4305 (const_int 30) (const_int 31)])))]
4307 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4308 [(set_attr "type" "sselog")
4309 (set_attr "prefix_extra" "1")
4310 (set_attr "length_immediate" "1")
4311 (set_attr "memory" "none,store")
4312 (set_attr "prefix" "vex")
4313 (set_attr "mode" "V8SF")])
4315 (define_insn "*sse4_1_extractps"
4316 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4318 (match_operand:V4SF 1 "register_operand" "x")
4319 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4321 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4322 [(set_attr "type" "sselog")
4323 (set_attr "prefix_data16" "1")
4324 (set_attr "prefix_extra" "1")
4325 (set_attr "length_immediate" "1")
4326 (set_attr "prefix" "maybe_vex")
4327 (set_attr "mode" "V4SF")])
4329 (define_insn_and_split "*vec_extract_v4sf_mem"
4330 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4332 (match_operand:V4SF 1 "memory_operand" "o")
4333 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4339 int i = INTVAL (operands[2]);
4341 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4345 (define_expand "vec_extract<mode>"
4346 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4347 (match_operand:SSEMODE 1 "register_operand" "")
4348 (match_operand 2 "const_int_operand" "")]
4351 ix86_expand_vector_extract (false, operands[0], operands[1],
4352 INTVAL (operands[2]));
4356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4358 ;; Parallel double-precision floating point element swizzling
4360 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4362 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4363 (define_insn "avx_unpckhpd256"
4364 [(set (match_operand:V4DF 0 "register_operand" "=x")
4367 (match_operand:V4DF 1 "register_operand" "x")
4368 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4369 (parallel [(const_int 1) (const_int 5)
4370 (const_int 3) (const_int 7)])))]
4372 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4373 [(set_attr "type" "sselog")
4374 (set_attr "prefix" "vex")
4375 (set_attr "mode" "V4DF")])
4377 (define_expand "vec_interleave_highv2df"
4378 [(set (match_operand:V2DF 0 "register_operand" "")
4381 (match_operand:V2DF 1 "nonimmediate_operand" "")
4382 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4383 (parallel [(const_int 1)
4387 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4388 operands[2] = force_reg (V2DFmode, operands[2]);
4391 (define_insn "*avx_interleave_highv2df"
4392 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4395 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4396 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4397 (parallel [(const_int 1)
4399 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4401 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4402 vmovddup\t{%H1, %0|%0, %H1}
4403 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4404 vmovhpd\t{%1, %0|%0, %1}"
4405 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4406 (set_attr "prefix" "vex")
4407 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4409 (define_insn "*sse3_interleave_highv2df"
4410 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4413 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4414 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4415 (parallel [(const_int 1)
4417 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4419 unpckhpd\t{%2, %0|%0, %2}
4420 movddup\t{%H1, %0|%0, %H1}
4421 movlpd\t{%H1, %0|%0, %H1}
4422 movhpd\t{%1, %0|%0, %1}"
4423 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4424 (set_attr "prefix_data16" "*,*,1,1")
4425 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4427 (define_insn "*sse2_interleave_highv2df"
4428 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4431 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4432 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4433 (parallel [(const_int 1)
4435 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4437 unpckhpd\t{%2, %0|%0, %2}
4438 movlpd\t{%H1, %0|%0, %H1}
4439 movhpd\t{%1, %0|%0, %1}"
4440 [(set_attr "type" "sselog,ssemov,ssemov")
4441 (set_attr "prefix_data16" "*,1,1")
4442 (set_attr "mode" "V2DF,V1DF,V1DF")])
4444 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4445 (define_expand "avx_movddup256"
4446 [(set (match_operand:V4DF 0 "register_operand" "")
4449 (match_operand:V4DF 1 "nonimmediate_operand" "")
4451 (parallel [(const_int 0) (const_int 4)
4452 (const_int 2) (const_int 6)])))]
4456 (define_expand "avx_unpcklpd256"
4457 [(set (match_operand:V4DF 0 "register_operand" "")
4460 (match_operand:V4DF 1 "register_operand" "")
4461 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4462 (parallel [(const_int 0) (const_int 4)
4463 (const_int 2) (const_int 6)])))]
4467 (define_insn "*avx_unpcklpd256"
4468 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4471 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4472 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4473 (parallel [(const_int 0) (const_int 4)
4474 (const_int 2) (const_int 6)])))]
4476 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4478 vmovddup\t{%1, %0|%0, %1}
4479 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4480 [(set_attr "type" "sselog")
4481 (set_attr "prefix" "vex")
4482 (set_attr "mode" "V4DF")])
4484 (define_expand "vec_interleave_lowv2df"
4485 [(set (match_operand:V2DF 0 "register_operand" "")
4488 (match_operand:V2DF 1 "nonimmediate_operand" "")
4489 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4490 (parallel [(const_int 0)
4494 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4495 operands[1] = force_reg (V2DFmode, operands[1]);
4498 (define_insn "*avx_interleave_lowv2df"
4499 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4502 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4503 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4504 (parallel [(const_int 0)
4506 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4508 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4509 vmovddup\t{%1, %0|%0, %1}
4510 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4511 vmovlpd\t{%2, %H0|%H0, %2}"
4512 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4513 (set_attr "prefix" "vex")
4514 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4516 (define_insn "*sse3_interleave_lowv2df"
4517 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4520 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4521 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4522 (parallel [(const_int 0)
4524 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4526 unpcklpd\t{%2, %0|%0, %2}
4527 movddup\t{%1, %0|%0, %1}
4528 movhpd\t{%2, %0|%0, %2}
4529 movlpd\t{%2, %H0|%H0, %2}"
4530 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4531 (set_attr "prefix_data16" "*,*,1,1")
4532 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4534 (define_insn "*sse2_interleave_lowv2df"
4535 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4538 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4539 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4540 (parallel [(const_int 0)
4542 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4544 unpcklpd\t{%2, %0|%0, %2}
4545 movhpd\t{%2, %0|%0, %2}
4546 movlpd\t{%2, %H0|%H0, %2}"
4547 [(set_attr "type" "sselog,ssemov,ssemov")
4548 (set_attr "prefix_data16" "*,1,1")
4549 (set_attr "mode" "V2DF,V1DF,V1DF")])
4552 [(set (match_operand:V2DF 0 "memory_operand" "")
4555 (match_operand:V2DF 1 "register_operand" "")
4557 (parallel [(const_int 0)
4559 "TARGET_SSE3 && reload_completed"
4562 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4563 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4564 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4569 [(set (match_operand:V2DF 0 "register_operand" "")
4572 (match_operand:V2DF 1 "memory_operand" "")
4574 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4575 (match_operand:SI 3 "const_int_operand" "")])))]
4576 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4577 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4579 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4582 (define_expand "avx_shufpd256"
4583 [(match_operand:V4DF 0 "register_operand" "")
4584 (match_operand:V4DF 1 "register_operand" "")
4585 (match_operand:V4DF 2 "nonimmediate_operand" "")
4586 (match_operand:SI 3 "const_int_operand" "")]
4589 int mask = INTVAL (operands[3]);
4590 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4592 GEN_INT (mask & 2 ? 5 : 4),
4593 GEN_INT (mask & 4 ? 3 : 2),
4594 GEN_INT (mask & 8 ? 7 : 6)));
4598 (define_insn "avx_shufpd256_1"
4599 [(set (match_operand:V4DF 0 "register_operand" "=x")
4602 (match_operand:V4DF 1 "register_operand" "x")
4603 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4604 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4605 (match_operand 4 "const_4_to_5_operand" "")
4606 (match_operand 5 "const_2_to_3_operand" "")
4607 (match_operand 6 "const_6_to_7_operand" "")])))]
4611 mask = INTVAL (operands[3]);
4612 mask |= (INTVAL (operands[4]) - 4) << 1;
4613 mask |= (INTVAL (operands[5]) - 2) << 2;
4614 mask |= (INTVAL (operands[6]) - 6) << 3;
4615 operands[3] = GEN_INT (mask);
4617 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4619 [(set_attr "type" "sselog")
4620 (set_attr "length_immediate" "1")
4621 (set_attr "prefix" "vex")
4622 (set_attr "mode" "V4DF")])
4624 (define_expand "sse2_shufpd"
4625 [(match_operand:V2DF 0 "register_operand" "")
4626 (match_operand:V2DF 1 "register_operand" "")
4627 (match_operand:V2DF 2 "nonimmediate_operand" "")
4628 (match_operand:SI 3 "const_int_operand" "")]
4631 int mask = INTVAL (operands[3]);
4632 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4634 GEN_INT (mask & 2 ? 3 : 2)));
4638 (define_expand "vec_extract_even<mode>"
4639 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4640 (match_operand:SSEMODE_EO 1 "register_operand" "")
4641 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4644 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4648 (define_expand "vec_extract_odd<mode>"
4649 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4650 (match_operand:SSEMODE_EO 1 "register_operand" "")
4651 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4654 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4658 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4659 (define_insn "*avx_interleave_highv2di"
4660 [(set (match_operand:V2DI 0 "register_operand" "=x")
4663 (match_operand:V2DI 1 "register_operand" "x")
4664 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4665 (parallel [(const_int 1)
4668 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4669 [(set_attr "type" "sselog")
4670 (set_attr "prefix" "vex")
4671 (set_attr "mode" "TI")])
4673 (define_insn "vec_interleave_highv2di"
4674 [(set (match_operand:V2DI 0 "register_operand" "=x")
4677 (match_operand:V2DI 1 "register_operand" "0")
4678 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4679 (parallel [(const_int 1)
4682 "punpckhqdq\t{%2, %0|%0, %2}"
4683 [(set_attr "type" "sselog")
4684 (set_attr "prefix_data16" "1")
4685 (set_attr "mode" "TI")])
4687 (define_insn "*avx_interleave_lowv2di"
4688 [(set (match_operand:V2DI 0 "register_operand" "=x")
4691 (match_operand:V2DI 1 "register_operand" "x")
4692 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4693 (parallel [(const_int 0)
4696 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4697 [(set_attr "type" "sselog")
4698 (set_attr "prefix" "vex")
4699 (set_attr "mode" "TI")])
4701 (define_insn "vec_interleave_lowv2di"
4702 [(set (match_operand:V2DI 0 "register_operand" "=x")
4705 (match_operand:V2DI 1 "register_operand" "0")
4706 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4707 (parallel [(const_int 0)
4710 "punpcklqdq\t{%2, %0|%0, %2}"
4711 [(set_attr "type" "sselog")
4712 (set_attr "prefix_data16" "1")
4713 (set_attr "mode" "TI")])
4715 (define_insn "*avx_shufpd_<mode>"
4716 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4717 (vec_select:SSEMODE2D
4718 (vec_concat:<ssedoublesizemode>
4719 (match_operand:SSEMODE2D 1 "register_operand" "x")
4720 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4721 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4722 (match_operand 4 "const_2_to_3_operand" "")])))]
4726 mask = INTVAL (operands[3]);
4727 mask |= (INTVAL (operands[4]) - 2) << 1;
4728 operands[3] = GEN_INT (mask);
4730 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4732 [(set_attr "type" "sselog")
4733 (set_attr "length_immediate" "1")
4734 (set_attr "prefix" "vex")
4735 (set_attr "mode" "V2DF")])
4737 (define_insn "sse2_shufpd_<mode>"
4738 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4739 (vec_select:SSEMODE2D
4740 (vec_concat:<ssedoublesizemode>
4741 (match_operand:SSEMODE2D 1 "register_operand" "0")
4742 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4743 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4744 (match_operand 4 "const_2_to_3_operand" "")])))]
4748 mask = INTVAL (operands[3]);
4749 mask |= (INTVAL (operands[4]) - 2) << 1;
4750 operands[3] = GEN_INT (mask);
4752 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4754 [(set_attr "type" "sselog")
4755 (set_attr "length_immediate" "1")
4756 (set_attr "mode" "V2DF")])
4758 ;; Avoid combining registers from different units in a single alternative,
4759 ;; see comment above inline_secondary_memory_needed function in i386.c
4760 (define_insn "*avx_storehpd"
4761 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4763 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4764 (parallel [(const_int 1)])))]
4765 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4767 vmovhpd\t{%1, %0|%0, %1}
4768 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4772 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4773 (set_attr "prefix" "vex")
4774 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4776 (define_insn "sse2_storehpd"
4777 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4779 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4780 (parallel [(const_int 1)])))]
4781 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4783 movhpd\t{%1, %0|%0, %1}
4788 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4789 (set_attr "prefix_data16" "1,*,*,*,*")
4790 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4793 [(set (match_operand:DF 0 "register_operand" "")
4795 (match_operand:V2DF 1 "memory_operand" "")
4796 (parallel [(const_int 1)])))]
4797 "TARGET_SSE2 && reload_completed"
4798 [(set (match_dup 0) (match_dup 1))]
4800 operands[1] = adjust_address (operands[1], DFmode, 8);
4803 ;; Avoid combining registers from different units in a single alternative,
4804 ;; see comment above inline_secondary_memory_needed function in i386.c
4805 (define_insn "sse2_storelpd"
4806 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4808 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4809 (parallel [(const_int 0)])))]
4810 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4812 %vmovlpd\t{%1, %0|%0, %1}
4817 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4818 (set_attr "prefix_data16" "1,*,*,*,*")
4819 (set_attr "prefix" "maybe_vex")
4820 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4823 [(set (match_operand:DF 0 "register_operand" "")
4825 (match_operand:V2DF 1 "nonimmediate_operand" "")
4826 (parallel [(const_int 0)])))]
4827 "TARGET_SSE2 && reload_completed"
4830 rtx op1 = operands[1];
4832 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4834 op1 = gen_lowpart (DFmode, op1);
4835 emit_move_insn (operands[0], op1);
4839 (define_expand "sse2_loadhpd_exp"
4840 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4843 (match_operand:V2DF 1 "nonimmediate_operand" "")
4844 (parallel [(const_int 0)]))
4845 (match_operand:DF 2 "nonimmediate_operand" "")))]
4847 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4849 ;; Avoid combining registers from different units in a single alternative,
4850 ;; see comment above inline_secondary_memory_needed function in i386.c
4851 (define_insn "*avx_loadhpd"
4852 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4855 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4856 (parallel [(const_int 0)]))
4857 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4858 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4860 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4861 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4865 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4866 (set_attr "prefix" "vex")
4867 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4869 (define_insn "sse2_loadhpd"
4870 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4873 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4874 (parallel [(const_int 0)]))
4875 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4876 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4878 movhpd\t{%2, %0|%0, %2}
4879 unpcklpd\t{%2, %0|%0, %2}
4880 shufpd\t{$1, %1, %0|%0, %1, 1}
4884 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4885 (set_attr "prefix_data16" "1,*,*,*,*,*")
4886 (set_attr "length_immediate" "*,*,1,*,*,*")
4887 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4890 [(set (match_operand:V2DF 0 "memory_operand" "")
4892 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4893 (match_operand:DF 1 "register_operand" "")))]
4894 "TARGET_SSE2 && reload_completed"
4895 [(set (match_dup 0) (match_dup 1))]
4897 operands[0] = adjust_address (operands[0], DFmode, 8);
4900 (define_expand "sse2_loadlpd_exp"
4901 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4903 (match_operand:DF 2 "nonimmediate_operand" "")
4905 (match_operand:V2DF 1 "nonimmediate_operand" "")
4906 (parallel [(const_int 1)]))))]
4908 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4910 ;; Avoid combining registers from different units in a single alternative,
4911 ;; see comment above inline_secondary_memory_needed function in i386.c
4912 (define_insn "*avx_loadlpd"
4913 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4915 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4917 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4918 (parallel [(const_int 1)]))))]
4919 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4921 vmovsd\t{%2, %0|%0, %2}
4922 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4923 vmovsd\t{%2, %1, %0|%0, %1, %2}
4924 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4928 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4929 (set_attr "prefix" "vex")
4930 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4932 (define_insn "sse2_loadlpd"
4933 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4935 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4937 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4938 (parallel [(const_int 1)]))))]
4939 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4941 movsd\t{%2, %0|%0, %2}
4942 movlpd\t{%2, %0|%0, %2}
4943 movsd\t{%2, %0|%0, %2}
4944 shufpd\t{$2, %2, %0|%0, %2, 2}
4945 movhpd\t{%H1, %0|%0, %H1}
4949 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4950 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4951 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4952 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4955 [(set (match_operand:V2DF 0 "memory_operand" "")
4957 (match_operand:DF 1 "register_operand" "")
4958 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4959 "TARGET_SSE2 && reload_completed"
4960 [(set (match_dup 0) (match_dup 1))]
4962 operands[0] = adjust_address (operands[0], DFmode, 8);
4965 ;; Not sure these two are ever used, but it doesn't hurt to have
4967 (define_insn "*vec_extractv2df_1_sse"
4968 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4970 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4971 (parallel [(const_int 1)])))]
4972 "!TARGET_SSE2 && TARGET_SSE
4973 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4975 movhps\t{%1, %0|%0, %1}
4976 movhlps\t{%1, %0|%0, %1}
4977 movlps\t{%H1, %0|%0, %H1}"
4978 [(set_attr "type" "ssemov")
4979 (set_attr "mode" "V2SF,V4SF,V2SF")])
4981 (define_insn "*vec_extractv2df_0_sse"
4982 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4984 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4985 (parallel [(const_int 0)])))]
4986 "!TARGET_SSE2 && TARGET_SSE
4987 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4989 movlps\t{%1, %0|%0, %1}
4990 movaps\t{%1, %0|%0, %1}
4991 movlps\t{%1, %0|%0, %1}"
4992 [(set_attr "type" "ssemov")
4993 (set_attr "mode" "V2SF,V4SF,V2SF")])
4995 (define_insn "*avx_movsd"
4996 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4998 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4999 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5003 vmovsd\t{%2, %1, %0|%0, %1, %2}
5004 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5005 vmovlpd\t{%2, %0|%0, %2}
5006 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5007 vmovhps\t{%1, %H0|%H0, %1}"
5008 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5009 (set_attr "prefix" "vex")
5010 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5012 (define_insn "sse2_movsd"
5013 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5015 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5016 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5020 movsd\t{%2, %0|%0, %2}
5021 movlpd\t{%2, %0|%0, %2}
5022 movlpd\t{%2, %0|%0, %2}
5023 shufpd\t{$2, %2, %0|%0, %2, 2}
5024 movhps\t{%H1, %0|%0, %H1}
5025 movhps\t{%1, %H0|%H0, %1}"
5026 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5027 (set_attr "prefix_data16" "*,1,1,*,*,*")
5028 (set_attr "length_immediate" "*,*,*,1,*,*")
5029 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5031 (define_insn "*vec_dupv2df_sse3"
5032 [(set (match_operand:V2DF 0 "register_operand" "=x")
5034 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5036 "%vmovddup\t{%1, %0|%0, %1}"
5037 [(set_attr "type" "sselog1")
5038 (set_attr "prefix" "maybe_vex")
5039 (set_attr "mode" "DF")])
5041 (define_insn "vec_dupv2df"
5042 [(set (match_operand:V2DF 0 "register_operand" "=x")
5044 (match_operand:DF 1 "register_operand" "0")))]
5047 [(set_attr "type" "sselog1")
5048 (set_attr "mode" "V2DF")])
5050 (define_insn "*vec_concatv2df_sse3"
5051 [(set (match_operand:V2DF 0 "register_operand" "=x")
5053 (match_operand:DF 1 "nonimmediate_operand" "xm")
5056 "%vmovddup\t{%1, %0|%0, %1}"
5057 [(set_attr "type" "sselog1")
5058 (set_attr "prefix" "maybe_vex")
5059 (set_attr "mode" "DF")])
5061 (define_insn "*vec_concatv2df_avx"
5062 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5064 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5065 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5068 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5069 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5070 vmovsd\t{%1, %0|%0, %1}"
5071 [(set_attr "type" "ssemov")
5072 (set_attr "prefix" "vex")
5073 (set_attr "mode" "DF,V1DF,DF")])
5075 (define_insn "*vec_concatv2df"
5076 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5078 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5079 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5082 unpcklpd\t{%2, %0|%0, %2}
5083 movhpd\t{%2, %0|%0, %2}
5084 movsd\t{%1, %0|%0, %1}
5085 movlhps\t{%2, %0|%0, %2}
5086 movhps\t{%2, %0|%0, %2}"
5087 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5088 (set_attr "prefix_data16" "*,1,*,*,*")
5089 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5093 ;; Parallel integral arithmetic
5095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5097 (define_expand "neg<mode>2"
5098 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5101 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5103 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5105 (define_expand "<plusminus_insn><mode>3"
5106 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5108 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5109 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5111 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5113 (define_insn "*avx_<plusminus_insn><mode>3"
5114 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5116 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5117 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5118 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5119 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5120 [(set_attr "type" "sseiadd")
5121 (set_attr "prefix" "vex")
5122 (set_attr "mode" "TI")])
5124 (define_insn "*<plusminus_insn><mode>3"
5125 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5127 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5128 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5129 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5130 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5131 [(set_attr "type" "sseiadd")
5132 (set_attr "prefix_data16" "1")
5133 (set_attr "mode" "TI")])
5135 (define_expand "sse2_<plusminus_insn><mode>3"
5136 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5137 (sat_plusminus:SSEMODE12
5138 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5139 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5141 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5143 (define_insn "*avx_<plusminus_insn><mode>3"
5144 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5145 (sat_plusminus:SSEMODE12
5146 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5147 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5148 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5149 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5150 [(set_attr "type" "sseiadd")
5151 (set_attr "prefix" "vex")
5152 (set_attr "mode" "TI")])
5154 (define_insn "*sse2_<plusminus_insn><mode>3"
5155 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5156 (sat_plusminus:SSEMODE12
5157 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5158 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5159 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5160 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5161 [(set_attr "type" "sseiadd")
5162 (set_attr "prefix_data16" "1")
5163 (set_attr "mode" "TI")])
5165 (define_insn_and_split "mulv16qi3"
5166 [(set (match_operand:V16QI 0 "register_operand" "")
5167 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5168 (match_operand:V16QI 2 "register_operand" "")))]
5170 && can_create_pseudo_p ()"
5178 for (i = 0; i < 6; ++i)
5179 t[i] = gen_reg_rtx (V16QImode);
5181 /* Unpack data such that we've got a source byte in each low byte of
5182 each word. We don't care what goes into the high byte of each word.
5183 Rather than trying to get zero in there, most convenient is to let
5184 it be a copy of the low byte. */
5185 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5186 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5187 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5188 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5190 /* Multiply words. The end-of-line annotations here give a picture of what
5191 the output of that instruction looks like. Dot means don't care; the
5192 letters are the bytes of the result with A being the most significant. */
5193 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5194 gen_lowpart (V8HImode, t[0]),
5195 gen_lowpart (V8HImode, t[1])));
5196 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5197 gen_lowpart (V8HImode, t[2]),
5198 gen_lowpart (V8HImode, t[3])));
5200 /* Extract the even bytes and merge them back together. */
5201 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5205 (define_expand "mulv8hi3"
5206 [(set (match_operand:V8HI 0 "register_operand" "")
5207 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5208 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5210 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5212 (define_insn "*avx_mulv8hi3"
5213 [(set (match_operand:V8HI 0 "register_operand" "=x")
5214 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5215 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5216 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5217 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5218 [(set_attr "type" "sseimul")
5219 (set_attr "prefix" "vex")
5220 (set_attr "mode" "TI")])
5222 (define_insn "*mulv8hi3"
5223 [(set (match_operand:V8HI 0 "register_operand" "=x")
5224 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5225 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5226 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5227 "pmullw\t{%2, %0|%0, %2}"
5228 [(set_attr "type" "sseimul")
5229 (set_attr "prefix_data16" "1")
5230 (set_attr "mode" "TI")])
5232 (define_expand "smulv8hi3_highpart"
5233 [(set (match_operand:V8HI 0 "register_operand" "")
5238 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5240 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5243 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5245 (define_insn "*avxv8hi3_highpart"
5246 [(set (match_operand:V8HI 0 "register_operand" "=x")
5251 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5253 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5255 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5256 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5257 [(set_attr "type" "sseimul")
5258 (set_attr "prefix" "vex")
5259 (set_attr "mode" "TI")])
5261 (define_insn "*smulv8hi3_highpart"
5262 [(set (match_operand:V8HI 0 "register_operand" "=x")
5267 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5269 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5271 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5272 "pmulhw\t{%2, %0|%0, %2}"
5273 [(set_attr "type" "sseimul")
5274 (set_attr "prefix_data16" "1")
5275 (set_attr "mode" "TI")])
5277 (define_expand "umulv8hi3_highpart"
5278 [(set (match_operand:V8HI 0 "register_operand" "")
5283 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5285 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5288 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5290 (define_insn "*avx_umulv8hi3_highpart"
5291 [(set (match_operand:V8HI 0 "register_operand" "=x")
5296 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5298 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5300 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5301 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5302 [(set_attr "type" "sseimul")
5303 (set_attr "prefix" "vex")
5304 (set_attr "mode" "TI")])
5306 (define_insn "*umulv8hi3_highpart"
5307 [(set (match_operand:V8HI 0 "register_operand" "=x")
5312 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5314 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5316 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5317 "pmulhuw\t{%2, %0|%0, %2}"
5318 [(set_attr "type" "sseimul")
5319 (set_attr "prefix_data16" "1")
5320 (set_attr "mode" "TI")])
5322 (define_expand "sse2_umulv2siv2di3"
5323 [(set (match_operand:V2DI 0 "register_operand" "")
5327 (match_operand:V4SI 1 "nonimmediate_operand" "")
5328 (parallel [(const_int 0) (const_int 2)])))
5331 (match_operand:V4SI 2 "nonimmediate_operand" "")
5332 (parallel [(const_int 0) (const_int 2)])))))]
5334 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5336 (define_insn "*avx_umulv2siv2di3"
5337 [(set (match_operand:V2DI 0 "register_operand" "=x")
5341 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5342 (parallel [(const_int 0) (const_int 2)])))
5345 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5346 (parallel [(const_int 0) (const_int 2)])))))]
5347 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5348 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5349 [(set_attr "type" "sseimul")
5350 (set_attr "prefix" "vex")
5351 (set_attr "mode" "TI")])
5353 (define_insn "*sse2_umulv2siv2di3"
5354 [(set (match_operand:V2DI 0 "register_operand" "=x")
5358 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5359 (parallel [(const_int 0) (const_int 2)])))
5362 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5363 (parallel [(const_int 0) (const_int 2)])))))]
5364 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5365 "pmuludq\t{%2, %0|%0, %2}"
5366 [(set_attr "type" "sseimul")
5367 (set_attr "prefix_data16" "1")
5368 (set_attr "mode" "TI")])
5370 (define_expand "sse4_1_mulv2siv2di3"
5371 [(set (match_operand:V2DI 0 "register_operand" "")
5375 (match_operand:V4SI 1 "nonimmediate_operand" "")
5376 (parallel [(const_int 0) (const_int 2)])))
5379 (match_operand:V4SI 2 "nonimmediate_operand" "")
5380 (parallel [(const_int 0) (const_int 2)])))))]
5382 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5384 (define_insn "*avx_mulv2siv2di3"
5385 [(set (match_operand:V2DI 0 "register_operand" "=x")
5389 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5390 (parallel [(const_int 0) (const_int 2)])))
5393 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5394 (parallel [(const_int 0) (const_int 2)])))))]
5395 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5396 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5397 [(set_attr "type" "sseimul")
5398 (set_attr "prefix_extra" "1")
5399 (set_attr "prefix" "vex")
5400 (set_attr "mode" "TI")])
5402 (define_insn "*sse4_1_mulv2siv2di3"
5403 [(set (match_operand:V2DI 0 "register_operand" "=x")
5407 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5408 (parallel [(const_int 0) (const_int 2)])))
5411 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5412 (parallel [(const_int 0) (const_int 2)])))))]
5413 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5414 "pmuldq\t{%2, %0|%0, %2}"
5415 [(set_attr "type" "sseimul")
5416 (set_attr "prefix_extra" "1")
5417 (set_attr "mode" "TI")])
5419 (define_expand "sse2_pmaddwd"
5420 [(set (match_operand:V4SI 0 "register_operand" "")
5425 (match_operand:V8HI 1 "nonimmediate_operand" "")
5426 (parallel [(const_int 0)
5432 (match_operand:V8HI 2 "nonimmediate_operand" "")
5433 (parallel [(const_int 0)
5439 (vec_select:V4HI (match_dup 1)
5440 (parallel [(const_int 1)
5445 (vec_select:V4HI (match_dup 2)
5446 (parallel [(const_int 1)
5449 (const_int 7)]))))))]
5451 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5453 (define_insn "*avx_pmaddwd"
5454 [(set (match_operand:V4SI 0 "register_operand" "=x")
5459 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5460 (parallel [(const_int 0)
5466 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5467 (parallel [(const_int 0)
5473 (vec_select:V4HI (match_dup 1)
5474 (parallel [(const_int 1)
5479 (vec_select:V4HI (match_dup 2)
5480 (parallel [(const_int 1)
5483 (const_int 7)]))))))]
5484 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5485 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5486 [(set_attr "type" "sseiadd")
5487 (set_attr "prefix" "vex")
5488 (set_attr "mode" "TI")])
5490 (define_insn "*sse2_pmaddwd"
5491 [(set (match_operand:V4SI 0 "register_operand" "=x")
5496 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5497 (parallel [(const_int 0)
5503 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5504 (parallel [(const_int 0)
5510 (vec_select:V4HI (match_dup 1)
5511 (parallel [(const_int 1)
5516 (vec_select:V4HI (match_dup 2)
5517 (parallel [(const_int 1)
5520 (const_int 7)]))))))]
5521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5522 "pmaddwd\t{%2, %0|%0, %2}"
5523 [(set_attr "type" "sseiadd")
5524 (set_attr "atom_unit" "simul")
5525 (set_attr "prefix_data16" "1")
5526 (set_attr "mode" "TI")])
5528 (define_expand "mulv4si3"
5529 [(set (match_operand:V4SI 0 "register_operand" "")
5530 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5531 (match_operand:V4SI 2 "register_operand" "")))]
5534 if (TARGET_SSE4_1 || TARGET_AVX)
5535 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5538 (define_insn "*avx_mulv4si3"
5539 [(set (match_operand:V4SI 0 "register_operand" "=x")
5540 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5541 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5542 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5543 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5544 [(set_attr "type" "sseimul")
5545 (set_attr "prefix_extra" "1")
5546 (set_attr "prefix" "vex")
5547 (set_attr "mode" "TI")])
5549 (define_insn "*sse4_1_mulv4si3"
5550 [(set (match_operand:V4SI 0 "register_operand" "=x")
5551 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5552 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5553 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5554 "pmulld\t{%2, %0|%0, %2}"
5555 [(set_attr "type" "sseimul")
5556 (set_attr "prefix_extra" "1")
5557 (set_attr "mode" "TI")])
5559 (define_insn_and_split "*sse2_mulv4si3"
5560 [(set (match_operand:V4SI 0 "register_operand" "")
5561 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5562 (match_operand:V4SI 2 "register_operand" "")))]
5563 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5564 && can_create_pseudo_p ()"
5569 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5575 t1 = gen_reg_rtx (V4SImode);
5576 t2 = gen_reg_rtx (V4SImode);
5577 t3 = gen_reg_rtx (V4SImode);
5578 t4 = gen_reg_rtx (V4SImode);
5579 t5 = gen_reg_rtx (V4SImode);
5580 t6 = gen_reg_rtx (V4SImode);
5581 thirtytwo = GEN_INT (32);
5583 /* Multiply elements 2 and 0. */
5584 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5587 /* Shift both input vectors down one element, so that elements 3
5588 and 1 are now in the slots for elements 2 and 0. For K8, at
5589 least, this is faster than using a shuffle. */
5590 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5591 gen_lowpart (V1TImode, op1),
5593 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5594 gen_lowpart (V1TImode, op2),
5596 /* Multiply elements 3 and 1. */
5597 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5600 /* Move the results in element 2 down to element 1; we don't care
5601 what goes in elements 2 and 3. */
5602 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5603 const0_rtx, const0_rtx));
5604 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5605 const0_rtx, const0_rtx));
5607 /* Merge the parts back together. */
5608 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5612 (define_insn_and_split "mulv2di3"
5613 [(set (match_operand:V2DI 0 "register_operand" "")
5614 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5615 (match_operand:V2DI 2 "register_operand" "")))]
5617 && can_create_pseudo_p ()"
5622 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5631 /* op1: A,B,C,D, op2: E,F,G,H */
5632 op1 = gen_lowpart (V4SImode, op1);
5633 op2 = gen_lowpart (V4SImode, op2);
5635 t1 = gen_reg_rtx (V4SImode);
5636 t2 = gen_reg_rtx (V4SImode);
5637 t3 = gen_reg_rtx (V2DImode);
5638 t4 = gen_reg_rtx (V2DImode);
5641 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5647 /* t2: (B*E),(A*F),(D*G),(C*H) */
5648 emit_insn (gen_mulv4si3 (t2, t1, op2));
5650 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5651 emit_insn (gen_xop_phadddq (t3, t2));
5653 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5654 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5656 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5657 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5661 t1 = gen_reg_rtx (V2DImode);
5662 t2 = gen_reg_rtx (V2DImode);
5663 t3 = gen_reg_rtx (V2DImode);
5664 t4 = gen_reg_rtx (V2DImode);
5665 t5 = gen_reg_rtx (V2DImode);
5666 t6 = gen_reg_rtx (V2DImode);
5667 thirtytwo = GEN_INT (32);
5669 /* Multiply low parts. */
5670 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5671 gen_lowpart (V4SImode, op2)));
5673 /* Shift input vectors left 32 bits so we can multiply high parts. */
5674 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5675 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5677 /* Multiply high parts by low parts. */
5678 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5679 gen_lowpart (V4SImode, t3)));
5680 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5681 gen_lowpart (V4SImode, t2)));
5683 /* Shift them back. */
5684 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5685 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5687 /* Add the three parts together. */
5688 emit_insn (gen_addv2di3 (t6, t1, t4));
5689 emit_insn (gen_addv2di3 (op0, t6, t5));
5694 (define_expand "vec_widen_smult_hi_v8hi"
5695 [(match_operand:V4SI 0 "register_operand" "")
5696 (match_operand:V8HI 1 "register_operand" "")
5697 (match_operand:V8HI 2 "register_operand" "")]
5700 rtx op1, op2, t1, t2, dest;
5704 t1 = gen_reg_rtx (V8HImode);
5705 t2 = gen_reg_rtx (V8HImode);
5706 dest = gen_lowpart (V8HImode, operands[0]);
5708 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5709 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5710 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5714 (define_expand "vec_widen_smult_lo_v8hi"
5715 [(match_operand:V4SI 0 "register_operand" "")
5716 (match_operand:V8HI 1 "register_operand" "")
5717 (match_operand:V8HI 2 "register_operand" "")]
5720 rtx op1, op2, t1, t2, dest;
5724 t1 = gen_reg_rtx (V8HImode);
5725 t2 = gen_reg_rtx (V8HImode);
5726 dest = gen_lowpart (V8HImode, operands[0]);
5728 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5729 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5730 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5734 (define_expand "vec_widen_umult_hi_v8hi"
5735 [(match_operand:V4SI 0 "register_operand" "")
5736 (match_operand:V8HI 1 "register_operand" "")
5737 (match_operand:V8HI 2 "register_operand" "")]
5740 rtx op1, op2, t1, t2, dest;
5744 t1 = gen_reg_rtx (V8HImode);
5745 t2 = gen_reg_rtx (V8HImode);
5746 dest = gen_lowpart (V8HImode, operands[0]);
5748 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5749 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5750 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5754 (define_expand "vec_widen_umult_lo_v8hi"
5755 [(match_operand:V4SI 0 "register_operand" "")
5756 (match_operand:V8HI 1 "register_operand" "")
5757 (match_operand:V8HI 2 "register_operand" "")]
5760 rtx op1, op2, t1, t2, dest;
5764 t1 = gen_reg_rtx (V8HImode);
5765 t2 = gen_reg_rtx (V8HImode);
5766 dest = gen_lowpart (V8HImode, operands[0]);
5768 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5769 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5770 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5774 (define_expand "vec_widen_smult_hi_v4si"
5775 [(match_operand:V2DI 0 "register_operand" "")
5776 (match_operand:V4SI 1 "register_operand" "")
5777 (match_operand:V4SI 2 "register_operand" "")]
5782 t1 = gen_reg_rtx (V4SImode);
5783 t2 = gen_reg_rtx (V4SImode);
5785 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5790 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5795 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5799 (define_expand "vec_widen_smult_lo_v4si"
5800 [(match_operand:V2DI 0 "register_operand" "")
5801 (match_operand:V4SI 1 "register_operand" "")
5802 (match_operand:V4SI 2 "register_operand" "")]
5807 t1 = gen_reg_rtx (V4SImode);
5808 t2 = gen_reg_rtx (V4SImode);
5810 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5815 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5820 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5824 (define_expand "vec_widen_umult_hi_v4si"
5825 [(match_operand:V2DI 0 "register_operand" "")
5826 (match_operand:V4SI 1 "register_operand" "")
5827 (match_operand:V4SI 2 "register_operand" "")]
5830 rtx op1, op2, t1, t2;
5834 t1 = gen_reg_rtx (V4SImode);
5835 t2 = gen_reg_rtx (V4SImode);
5837 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5838 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5839 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5843 (define_expand "vec_widen_umult_lo_v4si"
5844 [(match_operand:V2DI 0 "register_operand" "")
5845 (match_operand:V4SI 1 "register_operand" "")
5846 (match_operand:V4SI 2 "register_operand" "")]
5849 rtx op1, op2, t1, t2;
5853 t1 = gen_reg_rtx (V4SImode);
5854 t2 = gen_reg_rtx (V4SImode);
5856 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5857 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5858 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5862 (define_expand "sdot_prodv8hi"
5863 [(match_operand:V4SI 0 "register_operand" "")
5864 (match_operand:V8HI 1 "register_operand" "")
5865 (match_operand:V8HI 2 "register_operand" "")
5866 (match_operand:V4SI 3 "register_operand" "")]
5869 rtx t = gen_reg_rtx (V4SImode);
5870 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5871 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5875 (define_expand "udot_prodv4si"
5876 [(match_operand:V2DI 0 "register_operand" "")
5877 (match_operand:V4SI 1 "register_operand" "")
5878 (match_operand:V4SI 2 "register_operand" "")
5879 (match_operand:V2DI 3 "register_operand" "")]
5884 t1 = gen_reg_rtx (V2DImode);
5885 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5886 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5888 t2 = gen_reg_rtx (V4SImode);
5889 t3 = gen_reg_rtx (V4SImode);
5890 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5891 gen_lowpart (V1TImode, operands[1]),
5893 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5894 gen_lowpart (V1TImode, operands[2]),
5897 t4 = gen_reg_rtx (V2DImode);
5898 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5900 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5904 (define_insn "*avx_ashr<mode>3"
5905 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5907 (match_operand:SSEMODE24 1 "register_operand" "x")
5908 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5910 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5911 [(set_attr "type" "sseishft")
5912 (set_attr "prefix" "vex")
5913 (set (attr "length_immediate")
5914 (if_then_else (match_operand 2 "const_int_operand" "")
5916 (const_string "0")))
5917 (set_attr "mode" "TI")])
5919 (define_insn "ashr<mode>3"
5920 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5922 (match_operand:SSEMODE24 1 "register_operand" "0")
5923 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5925 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5926 [(set_attr "type" "sseishft")
5927 (set_attr "prefix_data16" "1")
5928 (set (attr "length_immediate")
5929 (if_then_else (match_operand 2 "const_int_operand" "")
5931 (const_string "0")))
5932 (set_attr "mode" "TI")])
5934 (define_insn "*avx_lshrv1ti3"
5935 [(set (match_operand:V1TI 0 "register_operand" "=x")
5937 (match_operand:V1TI 1 "register_operand" "x")
5938 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5941 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5942 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5944 [(set_attr "type" "sseishft")
5945 (set_attr "prefix" "vex")
5946 (set_attr "length_immediate" "1")
5947 (set_attr "mode" "TI")])
5949 (define_insn "*avx_lshr<mode>3"
5950 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5951 (lshiftrt:SSEMODE248
5952 (match_operand:SSEMODE248 1 "register_operand" "x")
5953 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5955 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5956 [(set_attr "type" "sseishft")
5957 (set_attr "prefix" "vex")
5958 (set (attr "length_immediate")
5959 (if_then_else (match_operand 2 "const_int_operand" "")
5961 (const_string "0")))
5962 (set_attr "mode" "TI")])
5964 (define_insn "sse2_lshrv1ti3"
5965 [(set (match_operand:V1TI 0 "register_operand" "=x")
5967 (match_operand:V1TI 1 "register_operand" "0")
5968 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5971 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5972 return "psrldq\t{%2, %0|%0, %2}";
5974 [(set_attr "type" "sseishft")
5975 (set_attr "prefix_data16" "1")
5976 (set_attr "length_immediate" "1")
5977 (set_attr "mode" "TI")])
5979 (define_insn "lshr<mode>3"
5980 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5981 (lshiftrt:SSEMODE248
5982 (match_operand:SSEMODE248 1 "register_operand" "0")
5983 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5985 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5986 [(set_attr "type" "sseishft")
5987 (set_attr "prefix_data16" "1")
5988 (set (attr "length_immediate")
5989 (if_then_else (match_operand 2 "const_int_operand" "")
5991 (const_string "0")))
5992 (set_attr "mode" "TI")])
5994 (define_insn "*avx_ashlv1ti3"
5995 [(set (match_operand:V1TI 0 "register_operand" "=x")
5996 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5997 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6000 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6001 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6003 [(set_attr "type" "sseishft")
6004 (set_attr "prefix" "vex")
6005 (set_attr "length_immediate" "1")
6006 (set_attr "mode" "TI")])
6008 (define_insn "*avx_ashl<mode>3"
6009 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6011 (match_operand:SSEMODE248 1 "register_operand" "x")
6012 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6014 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6015 [(set_attr "type" "sseishft")
6016 (set_attr "prefix" "vex")
6017 (set (attr "length_immediate")
6018 (if_then_else (match_operand 2 "const_int_operand" "")
6020 (const_string "0")))
6021 (set_attr "mode" "TI")])
6023 (define_insn "sse2_ashlv1ti3"
6024 [(set (match_operand:V1TI 0 "register_operand" "=x")
6025 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6026 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6029 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6030 return "pslldq\t{%2, %0|%0, %2}";
6032 [(set_attr "type" "sseishft")
6033 (set_attr "prefix_data16" "1")
6034 (set_attr "length_immediate" "1")
6035 (set_attr "mode" "TI")])
6037 (define_insn "ashl<mode>3"
6038 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6040 (match_operand:SSEMODE248 1 "register_operand" "0")
6041 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6043 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6044 [(set_attr "type" "sseishft")
6045 (set_attr "prefix_data16" "1")
6046 (set (attr "length_immediate")
6047 (if_then_else (match_operand 2 "const_int_operand" "")
6049 (const_string "0")))
6050 (set_attr "mode" "TI")])
6052 (define_expand "vec_shl_<mode>"
6053 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6055 (match_operand:SSEMODEI 1 "register_operand" "")
6056 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6059 operands[0] = gen_lowpart (V1TImode, operands[0]);
6060 operands[1] = gen_lowpart (V1TImode, operands[1]);
6063 (define_expand "vec_shr_<mode>"
6064 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6066 (match_operand:SSEMODEI 1 "register_operand" "")
6067 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6070 operands[0] = gen_lowpart (V1TImode, operands[0]);
6071 operands[1] = gen_lowpart (V1TImode, operands[1]);
6074 (define_insn "*avx_<code><mode>3"
6075 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6077 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6078 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6079 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6080 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6081 [(set_attr "type" "sseiadd")
6082 (set (attr "prefix_extra")
6084 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6087 (const_string "0")))
6088 (set_attr "prefix" "vex")
6089 (set_attr "mode" "TI")])
6091 (define_expand "<code>v16qi3"
6092 [(set (match_operand:V16QI 0 "register_operand" "")
6094 (match_operand:V16QI 1 "nonimmediate_operand" "")
6095 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6097 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6099 (define_insn "*<code>v16qi3"
6100 [(set (match_operand:V16QI 0 "register_operand" "=x")
6102 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6103 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6104 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6105 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6106 [(set_attr "type" "sseiadd")
6107 (set_attr "prefix_data16" "1")
6108 (set_attr "mode" "TI")])
6110 (define_expand "<code>v8hi3"
6111 [(set (match_operand:V8HI 0 "register_operand" "")
6113 (match_operand:V8HI 1 "nonimmediate_operand" "")
6114 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6116 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6118 (define_insn "*<code>v8hi3"
6119 [(set (match_operand:V8HI 0 "register_operand" "=x")
6121 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6122 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6123 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6124 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6125 [(set_attr "type" "sseiadd")
6126 (set_attr "prefix_data16" "1")
6127 (set_attr "mode" "TI")])
6129 (define_expand "umaxv8hi3"
6130 [(set (match_operand:V8HI 0 "register_operand" "")
6131 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6132 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6136 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6139 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6140 if (rtx_equal_p (op3, op2))
6141 op3 = gen_reg_rtx (V8HImode);
6142 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6143 emit_insn (gen_addv8hi3 (op0, op3, op2));
6148 (define_expand "smax<mode>3"
6149 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6150 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6151 (match_operand:SSEMODE14 2 "register_operand" "")))]
6155 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6161 xops[0] = operands[0];
6162 xops[1] = operands[1];
6163 xops[2] = operands[2];
6164 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6165 xops[4] = operands[1];
6166 xops[5] = operands[2];
6167 ok = ix86_expand_int_vcond (xops);
6173 (define_insn "*sse4_1_<code><mode>3"
6174 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6176 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6177 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6178 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6179 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6180 [(set_attr "type" "sseiadd")
6181 (set_attr "prefix_extra" "1")
6182 (set_attr "mode" "TI")])
6184 (define_expand "smaxv2di3"
6185 [(set (match_operand:V2DI 0 "register_operand" "")
6186 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6187 (match_operand:V2DI 2 "register_operand" "")))]
6193 xops[0] = operands[0];
6194 xops[1] = operands[1];
6195 xops[2] = operands[2];
6196 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6197 xops[4] = operands[1];
6198 xops[5] = operands[2];
6199 ok = ix86_expand_int_vcond (xops);
6204 (define_expand "umaxv4si3"
6205 [(set (match_operand:V4SI 0 "register_operand" "")
6206 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6207 (match_operand:V4SI 2 "register_operand" "")))]
6211 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6217 xops[0] = operands[0];
6218 xops[1] = operands[1];
6219 xops[2] = operands[2];
6220 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6221 xops[4] = operands[1];
6222 xops[5] = operands[2];
6223 ok = ix86_expand_int_vcond (xops);
6229 (define_insn "*sse4_1_<code><mode>3"
6230 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6232 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6233 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6234 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6235 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6236 [(set_attr "type" "sseiadd")
6237 (set_attr "prefix_extra" "1")
6238 (set_attr "mode" "TI")])
6240 (define_expand "umaxv2di3"
6241 [(set (match_operand:V2DI 0 "register_operand" "")
6242 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6243 (match_operand:V2DI 2 "register_operand" "")))]
6249 xops[0] = operands[0];
6250 xops[1] = operands[1];
6251 xops[2] = operands[2];
6252 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6253 xops[4] = operands[1];
6254 xops[5] = operands[2];
6255 ok = ix86_expand_int_vcond (xops);
6260 (define_expand "smin<mode>3"
6261 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6262 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6263 (match_operand:SSEMODE14 2 "register_operand" "")))]
6267 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6273 xops[0] = operands[0];
6274 xops[1] = operands[2];
6275 xops[2] = operands[1];
6276 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6277 xops[4] = operands[1];
6278 xops[5] = operands[2];
6279 ok = ix86_expand_int_vcond (xops);
6285 (define_expand "sminv2di3"
6286 [(set (match_operand:V2DI 0 "register_operand" "")
6287 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6288 (match_operand:V2DI 2 "register_operand" "")))]
6294 xops[0] = operands[0];
6295 xops[1] = operands[2];
6296 xops[2] = operands[1];
6297 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6298 xops[4] = operands[1];
6299 xops[5] = operands[2];
6300 ok = ix86_expand_int_vcond (xops);
6305 (define_expand "umin<mode>3"
6306 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6307 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6308 (match_operand:SSEMODE24 2 "register_operand" "")))]
6312 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6318 xops[0] = operands[0];
6319 xops[1] = operands[2];
6320 xops[2] = operands[1];
6321 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6322 xops[4] = operands[1];
6323 xops[5] = operands[2];
6324 ok = ix86_expand_int_vcond (xops);
6330 (define_expand "uminv2di3"
6331 [(set (match_operand:V2DI 0 "register_operand" "")
6332 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6333 (match_operand:V2DI 2 "register_operand" "")))]
6339 xops[0] = operands[0];
6340 xops[1] = operands[2];
6341 xops[2] = operands[1];
6342 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6343 xops[4] = operands[1];
6344 xops[5] = operands[2];
6345 ok = ix86_expand_int_vcond (xops);
6350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6352 ;; Parallel integral comparisons
6354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6356 (define_expand "sse2_eq<mode>3"
6357 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6359 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6360 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6361 "TARGET_SSE2 && !TARGET_XOP "
6362 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6364 (define_insn "*avx_eq<mode>3"
6365 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6367 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6368 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6369 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6370 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6371 [(set_attr "type" "ssecmp")
6372 (set (attr "prefix_extra")
6373 (if_then_else (match_operand:V2DI 0 "" "")
6375 (const_string "*")))
6376 (set_attr "prefix" "vex")
6377 (set_attr "mode" "TI")])
6379 (define_insn "*sse2_eq<mode>3"
6380 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6382 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6383 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6384 "TARGET_SSE2 && !TARGET_XOP
6385 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6386 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6387 [(set_attr "type" "ssecmp")
6388 (set_attr "prefix_data16" "1")
6389 (set_attr "mode" "TI")])
6391 (define_expand "sse4_1_eqv2di3"
6392 [(set (match_operand:V2DI 0 "register_operand" "")
6394 (match_operand:V2DI 1 "nonimmediate_operand" "")
6395 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6397 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6399 (define_insn "*sse4_1_eqv2di3"
6400 [(set (match_operand:V2DI 0 "register_operand" "=x")
6402 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6403 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6404 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6405 "pcmpeqq\t{%2, %0|%0, %2}"
6406 [(set_attr "type" "ssecmp")
6407 (set_attr "prefix_extra" "1")
6408 (set_attr "mode" "TI")])
6410 (define_insn "*avx_gt<mode>3"
6411 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6413 (match_operand:SSEMODE1248 1 "register_operand" "x")
6414 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6416 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6417 [(set_attr "type" "ssecmp")
6418 (set (attr "prefix_extra")
6419 (if_then_else (match_operand:V2DI 0 "" "")
6421 (const_string "*")))
6422 (set_attr "prefix" "vex")
6423 (set_attr "mode" "TI")])
6425 (define_insn "sse2_gt<mode>3"
6426 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6428 (match_operand:SSEMODE124 1 "register_operand" "0")
6429 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6430 "TARGET_SSE2 && !TARGET_XOP"
6431 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6432 [(set_attr "type" "ssecmp")
6433 (set_attr "prefix_data16" "1")
6434 (set_attr "mode" "TI")])
6436 (define_insn "sse4_2_gtv2di3"
6437 [(set (match_operand:V2DI 0 "register_operand" "=x")
6439 (match_operand:V2DI 1 "register_operand" "0")
6440 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6442 "pcmpgtq\t{%2, %0|%0, %2}"
6443 [(set_attr "type" "ssecmp")
6444 (set_attr "prefix_extra" "1")
6445 (set_attr "mode" "TI")])
6447 (define_expand "vcond<mode>"
6448 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6449 (if_then_else:SSEMODE124C8
6450 (match_operator 3 ""
6451 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6452 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6453 (match_operand:SSEMODE124C8 1 "general_operand" "")
6454 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6457 bool ok = ix86_expand_int_vcond (operands);
6462 (define_expand "vcondu<mode>"
6463 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6464 (if_then_else:SSEMODE124C8
6465 (match_operator 3 ""
6466 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6467 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6468 (match_operand:SSEMODE124C8 1 "general_operand" "")
6469 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6472 bool ok = ix86_expand_int_vcond (operands);
6477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6479 ;; Parallel bitwise logical operations
6481 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6483 (define_expand "one_cmpl<mode>2"
6484 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6485 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6489 int i, n = GET_MODE_NUNITS (<MODE>mode);
6490 rtvec v = rtvec_alloc (n);
6492 for (i = 0; i < n; ++i)
6493 RTVEC_ELT (v, i) = constm1_rtx;
6495 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6498 (define_insn "*avx_andnot<mode>3"
6499 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6501 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6502 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6504 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6505 [(set_attr "type" "sselog")
6506 (set_attr "prefix" "vex")
6507 (set_attr "mode" "<avxvecpsmode>")])
6509 (define_insn "*sse_andnot<mode>3"
6510 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6512 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6513 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6514 "(TARGET_SSE && !TARGET_SSE2)"
6515 "andnps\t{%2, %0|%0, %2}"
6516 [(set_attr "type" "sselog")
6517 (set_attr "mode" "V4SF")])
6519 (define_insn "*avx_andnot<mode>3"
6520 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6522 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6523 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6525 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6526 [(set_attr "type" "sselog")
6527 (set_attr "prefix" "vex")
6528 (set_attr "mode" "TI")])
6530 (define_insn "sse2_andnot<mode>3"
6531 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6533 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6534 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6536 "pandn\t{%2, %0|%0, %2}"
6537 [(set_attr "type" "sselog")
6538 (set_attr "prefix_data16" "1")
6539 (set_attr "mode" "TI")])
6541 (define_insn "*andnottf3"
6542 [(set (match_operand:TF 0 "register_operand" "=x")
6544 (not:TF (match_operand:TF 1 "register_operand" "0"))
6545 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6547 "pandn\t{%2, %0|%0, %2}"
6548 [(set_attr "type" "sselog")
6549 (set_attr "prefix_data16" "1")
6550 (set_attr "mode" "TI")])
6552 (define_expand "<code><mode>3"
6553 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6555 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6556 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6558 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6560 (define_insn "*avx_<code><mode>3"
6561 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6562 (any_logic:AVX256MODEI
6563 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6564 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6566 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6567 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6568 [(set_attr "type" "sselog")
6569 (set_attr "prefix" "vex")
6570 (set_attr "mode" "<avxvecpsmode>")])
6572 (define_insn "*sse_<code><mode>3"
6573 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6575 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6576 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6577 "(TARGET_SSE && !TARGET_SSE2)
6578 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6579 "<logic>ps\t{%2, %0|%0, %2}"
6580 [(set_attr "type" "sselog")
6581 (set_attr "mode" "V4SF")])
6583 (define_insn "*avx_<code><mode>3"
6584 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6586 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6587 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6589 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6590 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6591 [(set_attr "type" "sselog")
6592 (set_attr "prefix" "vex")
6593 (set_attr "mode" "TI")])
6595 (define_insn "*sse2_<code><mode>3"
6596 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6598 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6599 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6600 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6601 "p<logic>\t{%2, %0|%0, %2}"
6602 [(set_attr "type" "sselog")
6603 (set_attr "prefix_data16" "1")
6604 (set_attr "mode" "TI")])
6606 (define_expand "<code>tf3"
6607 [(set (match_operand:TF 0 "register_operand" "")
6609 (match_operand:TF 1 "nonimmediate_operand" "")
6610 (match_operand:TF 2 "nonimmediate_operand" "")))]
6612 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6614 (define_insn "*<code>tf3"
6615 [(set (match_operand:TF 0 "register_operand" "=x")
6617 (match_operand:TF 1 "nonimmediate_operand" "%0")
6618 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6619 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6620 "p<logic>\t{%2, %0|%0, %2}"
6621 [(set_attr "type" "sselog")
6622 (set_attr "prefix_data16" "1")
6623 (set_attr "mode" "TI")])
6625 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6627 ;; Parallel integral element swizzling
6629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6631 (define_expand "vec_pack_trunc_v8hi"
6632 [(match_operand:V16QI 0 "register_operand" "")
6633 (match_operand:V8HI 1 "register_operand" "")
6634 (match_operand:V8HI 2 "register_operand" "")]
6637 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6638 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6639 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6643 (define_expand "vec_pack_trunc_v4si"
6644 [(match_operand:V8HI 0 "register_operand" "")
6645 (match_operand:V4SI 1 "register_operand" "")
6646 (match_operand:V4SI 2 "register_operand" "")]
6649 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6650 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6651 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6655 (define_expand "vec_pack_trunc_v2di"
6656 [(match_operand:V4SI 0 "register_operand" "")
6657 (match_operand:V2DI 1 "register_operand" "")
6658 (match_operand:V2DI 2 "register_operand" "")]
6661 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6662 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6663 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6667 (define_insn "*avx_packsswb"
6668 [(set (match_operand:V16QI 0 "register_operand" "=x")
6671 (match_operand:V8HI 1 "register_operand" "x"))
6673 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6675 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "type" "sselog")
6677 (set_attr "prefix" "vex")
6678 (set_attr "mode" "TI")])
6680 (define_insn "sse2_packsswb"
6681 [(set (match_operand:V16QI 0 "register_operand" "=x")
6684 (match_operand:V8HI 1 "register_operand" "0"))
6686 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6688 "packsswb\t{%2, %0|%0, %2}"
6689 [(set_attr "type" "sselog")
6690 (set_attr "prefix_data16" "1")
6691 (set_attr "mode" "TI")])
6693 (define_insn "*avx_packssdw"
6694 [(set (match_operand:V8HI 0 "register_operand" "=x")
6697 (match_operand:V4SI 1 "register_operand" "x"))
6699 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6701 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6702 [(set_attr "type" "sselog")
6703 (set_attr "prefix" "vex")
6704 (set_attr "mode" "TI")])
6706 (define_insn "sse2_packssdw"
6707 [(set (match_operand:V8HI 0 "register_operand" "=x")
6710 (match_operand:V4SI 1 "register_operand" "0"))
6712 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6714 "packssdw\t{%2, %0|%0, %2}"
6715 [(set_attr "type" "sselog")
6716 (set_attr "prefix_data16" "1")
6717 (set_attr "mode" "TI")])
6719 (define_insn "*avx_packuswb"
6720 [(set (match_operand:V16QI 0 "register_operand" "=x")
6723 (match_operand:V8HI 1 "register_operand" "x"))
6725 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6727 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6728 [(set_attr "type" "sselog")
6729 (set_attr "prefix" "vex")
6730 (set_attr "mode" "TI")])
6732 (define_insn "sse2_packuswb"
6733 [(set (match_operand:V16QI 0 "register_operand" "=x")
6736 (match_operand:V8HI 1 "register_operand" "0"))
6738 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6740 "packuswb\t{%2, %0|%0, %2}"
6741 [(set_attr "type" "sselog")
6742 (set_attr "prefix_data16" "1")
6743 (set_attr "mode" "TI")])
6745 (define_insn "*avx_interleave_highv16qi"
6746 [(set (match_operand:V16QI 0 "register_operand" "=x")
6749 (match_operand:V16QI 1 "register_operand" "x")
6750 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6751 (parallel [(const_int 8) (const_int 24)
6752 (const_int 9) (const_int 25)
6753 (const_int 10) (const_int 26)
6754 (const_int 11) (const_int 27)
6755 (const_int 12) (const_int 28)
6756 (const_int 13) (const_int 29)
6757 (const_int 14) (const_int 30)
6758 (const_int 15) (const_int 31)])))]
6760 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6761 [(set_attr "type" "sselog")
6762 (set_attr "prefix" "vex")
6763 (set_attr "mode" "TI")])
6765 (define_insn "vec_interleave_highv16qi"
6766 [(set (match_operand:V16QI 0 "register_operand" "=x")
6769 (match_operand:V16QI 1 "register_operand" "0")
6770 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6771 (parallel [(const_int 8) (const_int 24)
6772 (const_int 9) (const_int 25)
6773 (const_int 10) (const_int 26)
6774 (const_int 11) (const_int 27)
6775 (const_int 12) (const_int 28)
6776 (const_int 13) (const_int 29)
6777 (const_int 14) (const_int 30)
6778 (const_int 15) (const_int 31)])))]
6780 "punpckhbw\t{%2, %0|%0, %2}"
6781 [(set_attr "type" "sselog")
6782 (set_attr "prefix_data16" "1")
6783 (set_attr "mode" "TI")])
6785 (define_insn "*avx_interleave_lowv16qi"
6786 [(set (match_operand:V16QI 0 "register_operand" "=x")
6789 (match_operand:V16QI 1 "register_operand" "x")
6790 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6791 (parallel [(const_int 0) (const_int 16)
6792 (const_int 1) (const_int 17)
6793 (const_int 2) (const_int 18)
6794 (const_int 3) (const_int 19)
6795 (const_int 4) (const_int 20)
6796 (const_int 5) (const_int 21)
6797 (const_int 6) (const_int 22)
6798 (const_int 7) (const_int 23)])))]
6800 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6801 [(set_attr "type" "sselog")
6802 (set_attr "prefix" "vex")
6803 (set_attr "mode" "TI")])
6805 (define_insn "vec_interleave_lowv16qi"
6806 [(set (match_operand:V16QI 0 "register_operand" "=x")
6809 (match_operand:V16QI 1 "register_operand" "0")
6810 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6811 (parallel [(const_int 0) (const_int 16)
6812 (const_int 1) (const_int 17)
6813 (const_int 2) (const_int 18)
6814 (const_int 3) (const_int 19)
6815 (const_int 4) (const_int 20)
6816 (const_int 5) (const_int 21)
6817 (const_int 6) (const_int 22)
6818 (const_int 7) (const_int 23)])))]
6820 "punpcklbw\t{%2, %0|%0, %2}"
6821 [(set_attr "type" "sselog")
6822 (set_attr "prefix_data16" "1")
6823 (set_attr "mode" "TI")])
6825 (define_insn "*avx_interleave_highv8hi"
6826 [(set (match_operand:V8HI 0 "register_operand" "=x")
6829 (match_operand:V8HI 1 "register_operand" "x")
6830 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6831 (parallel [(const_int 4) (const_int 12)
6832 (const_int 5) (const_int 13)
6833 (const_int 6) (const_int 14)
6834 (const_int 7) (const_int 15)])))]
6836 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6837 [(set_attr "type" "sselog")
6838 (set_attr "prefix" "vex")
6839 (set_attr "mode" "TI")])
6841 (define_insn "vec_interleave_highv8hi"
6842 [(set (match_operand:V8HI 0 "register_operand" "=x")
6845 (match_operand:V8HI 1 "register_operand" "0")
6846 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6847 (parallel [(const_int 4) (const_int 12)
6848 (const_int 5) (const_int 13)
6849 (const_int 6) (const_int 14)
6850 (const_int 7) (const_int 15)])))]
6852 "punpckhwd\t{%2, %0|%0, %2}"
6853 [(set_attr "type" "sselog")
6854 (set_attr "prefix_data16" "1")
6855 (set_attr "mode" "TI")])
6857 (define_insn "*avx_interleave_lowv8hi"
6858 [(set (match_operand:V8HI 0 "register_operand" "=x")
6861 (match_operand:V8HI 1 "register_operand" "x")
6862 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6863 (parallel [(const_int 0) (const_int 8)
6864 (const_int 1) (const_int 9)
6865 (const_int 2) (const_int 10)
6866 (const_int 3) (const_int 11)])))]
6868 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6869 [(set_attr "type" "sselog")
6870 (set_attr "prefix" "vex")
6871 (set_attr "mode" "TI")])
6873 (define_insn "vec_interleave_lowv8hi"
6874 [(set (match_operand:V8HI 0 "register_operand" "=x")
6877 (match_operand:V8HI 1 "register_operand" "0")
6878 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6879 (parallel [(const_int 0) (const_int 8)
6880 (const_int 1) (const_int 9)
6881 (const_int 2) (const_int 10)
6882 (const_int 3) (const_int 11)])))]
6884 "punpcklwd\t{%2, %0|%0, %2}"
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix_data16" "1")
6887 (set_attr "mode" "TI")])
6889 (define_insn "*avx_interleave_highv4si"
6890 [(set (match_operand:V4SI 0 "register_operand" "=x")
6893 (match_operand:V4SI 1 "register_operand" "x")
6894 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6895 (parallel [(const_int 2) (const_int 6)
6896 (const_int 3) (const_int 7)])))]
6898 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6899 [(set_attr "type" "sselog")
6900 (set_attr "prefix" "vex")
6901 (set_attr "mode" "TI")])
6903 (define_insn "vec_interleave_highv4si"
6904 [(set (match_operand:V4SI 0 "register_operand" "=x")
6907 (match_operand:V4SI 1 "register_operand" "0")
6908 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6909 (parallel [(const_int 2) (const_int 6)
6910 (const_int 3) (const_int 7)])))]
6912 "punpckhdq\t{%2, %0|%0, %2}"
6913 [(set_attr "type" "sselog")
6914 (set_attr "prefix_data16" "1")
6915 (set_attr "mode" "TI")])
6917 (define_insn "*avx_interleave_lowv4si"
6918 [(set (match_operand:V4SI 0 "register_operand" "=x")
6921 (match_operand:V4SI 1 "register_operand" "x")
6922 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6923 (parallel [(const_int 0) (const_int 4)
6924 (const_int 1) (const_int 5)])))]
6926 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6927 [(set_attr "type" "sselog")
6928 (set_attr "prefix" "vex")
6929 (set_attr "mode" "TI")])
6931 (define_insn "vec_interleave_lowv4si"
6932 [(set (match_operand:V4SI 0 "register_operand" "=x")
6935 (match_operand:V4SI 1 "register_operand" "0")
6936 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6937 (parallel [(const_int 0) (const_int 4)
6938 (const_int 1) (const_int 5)])))]
6940 "punpckldq\t{%2, %0|%0, %2}"
6941 [(set_attr "type" "sselog")
6942 (set_attr "prefix_data16" "1")
6943 (set_attr "mode" "TI")])
6945 (define_insn "*avx_pinsr<ssevecsize>"
6946 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6947 (vec_merge:SSEMODE124
6948 (vec_duplicate:SSEMODE124
6949 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6950 (match_operand:SSEMODE124 1 "register_operand" "x")
6951 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6954 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6955 if (MEM_P (operands[2]))
6956 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6958 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6960 [(set_attr "type" "sselog")
6961 (set (attr "prefix_extra")
6962 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6964 (const_string "1")))
6965 (set_attr "length_immediate" "1")
6966 (set_attr "prefix" "vex")
6967 (set_attr "mode" "TI")])
6969 (define_insn "*sse4_1_pinsrb"
6970 [(set (match_operand:V16QI 0 "register_operand" "=x")
6972 (vec_duplicate:V16QI
6973 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6974 (match_operand:V16QI 1 "register_operand" "0")
6975 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6978 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6979 if (MEM_P (operands[2]))
6980 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6982 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6984 [(set_attr "type" "sselog")
6985 (set_attr "prefix_extra" "1")
6986 (set_attr "length_immediate" "1")
6987 (set_attr "mode" "TI")])
6989 (define_insn "*sse2_pinsrw"
6990 [(set (match_operand:V8HI 0 "register_operand" "=x")
6993 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6994 (match_operand:V8HI 1 "register_operand" "0")
6995 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6998 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6999 if (MEM_P (operands[2]))
7000 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7002 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7004 [(set_attr "type" "sselog")
7005 (set_attr "prefix_data16" "1")
7006 (set_attr "length_immediate" "1")
7007 (set_attr "mode" "TI")])
7009 ;; It must come before sse2_loadld since it is preferred.
7010 (define_insn "*sse4_1_pinsrd"
7011 [(set (match_operand:V4SI 0 "register_operand" "=x")
7014 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7015 (match_operand:V4SI 1 "register_operand" "0")
7016 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7019 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7020 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7022 [(set_attr "type" "sselog")
7023 (set_attr "prefix_extra" "1")
7024 (set_attr "length_immediate" "1")
7025 (set_attr "mode" "TI")])
7027 (define_insn "*avx_pinsrq"
7028 [(set (match_operand:V2DI 0 "register_operand" "=x")
7031 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7032 (match_operand:V2DI 1 "register_operand" "x")
7033 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7034 "TARGET_AVX && TARGET_64BIT"
7036 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7037 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7039 [(set_attr "type" "sselog")
7040 (set_attr "prefix_extra" "1")
7041 (set_attr "length_immediate" "1")
7042 (set_attr "prefix" "vex")
7043 (set_attr "mode" "TI")])
7045 (define_insn "*sse4_1_pinsrq"
7046 [(set (match_operand:V2DI 0 "register_operand" "=x")
7049 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7050 (match_operand:V2DI 1 "register_operand" "0")
7051 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7052 "TARGET_SSE4_1 && TARGET_64BIT"
7054 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7055 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7057 [(set_attr "type" "sselog")
7058 (set_attr "prefix_rex" "1")
7059 (set_attr "prefix_extra" "1")
7060 (set_attr "length_immediate" "1")
7061 (set_attr "mode" "TI")])
7063 (define_insn "*sse4_1_pextrb"
7064 [(set (match_operand:SI 0 "register_operand" "=r")
7067 (match_operand:V16QI 1 "register_operand" "x")
7068 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7070 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7071 [(set_attr "type" "sselog")
7072 (set_attr "prefix_extra" "1")
7073 (set_attr "length_immediate" "1")
7074 (set_attr "prefix" "maybe_vex")
7075 (set_attr "mode" "TI")])
7077 (define_insn "*sse4_1_pextrb_memory"
7078 [(set (match_operand:QI 0 "memory_operand" "=m")
7080 (match_operand:V16QI 1 "register_operand" "x")
7081 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7083 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7084 [(set_attr "type" "sselog")
7085 (set_attr "prefix_extra" "1")
7086 (set_attr "length_immediate" "1")
7087 (set_attr "prefix" "maybe_vex")
7088 (set_attr "mode" "TI")])
7090 (define_insn "*sse2_pextrw"
7091 [(set (match_operand:SI 0 "register_operand" "=r")
7094 (match_operand:V8HI 1 "register_operand" "x")
7095 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7097 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix_data16" "1")
7100 (set_attr "length_immediate" "1")
7101 (set_attr "prefix" "maybe_vex")
7102 (set_attr "mode" "TI")])
7104 (define_insn "*sse4_1_pextrw_memory"
7105 [(set (match_operand:HI 0 "memory_operand" "=m")
7107 (match_operand:V8HI 1 "register_operand" "x")
7108 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7110 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7111 [(set_attr "type" "sselog")
7112 (set_attr "prefix_extra" "1")
7113 (set_attr "length_immediate" "1")
7114 (set_attr "prefix" "maybe_vex")
7115 (set_attr "mode" "TI")])
7117 (define_insn "*sse4_1_pextrd"
7118 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7120 (match_operand:V4SI 1 "register_operand" "x")
7121 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7123 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7124 [(set_attr "type" "sselog")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "length_immediate" "1")
7127 (set_attr "prefix" "maybe_vex")
7128 (set_attr "mode" "TI")])
7130 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7131 (define_insn "*sse4_1_pextrq"
7132 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7134 (match_operand:V2DI 1 "register_operand" "x")
7135 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7136 "TARGET_SSE4_1 && TARGET_64BIT"
7137 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7138 [(set_attr "type" "sselog")
7139 (set_attr "prefix_rex" "1")
7140 (set_attr "prefix_extra" "1")
7141 (set_attr "length_immediate" "1")
7142 (set_attr "prefix" "maybe_vex")
7143 (set_attr "mode" "TI")])
7145 (define_expand "sse2_pshufd"
7146 [(match_operand:V4SI 0 "register_operand" "")
7147 (match_operand:V4SI 1 "nonimmediate_operand" "")
7148 (match_operand:SI 2 "const_int_operand" "")]
7151 int mask = INTVAL (operands[2]);
7152 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7153 GEN_INT ((mask >> 0) & 3),
7154 GEN_INT ((mask >> 2) & 3),
7155 GEN_INT ((mask >> 4) & 3),
7156 GEN_INT ((mask >> 6) & 3)));
7160 (define_insn "sse2_pshufd_1"
7161 [(set (match_operand:V4SI 0 "register_operand" "=x")
7163 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7164 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7165 (match_operand 3 "const_0_to_3_operand" "")
7166 (match_operand 4 "const_0_to_3_operand" "")
7167 (match_operand 5 "const_0_to_3_operand" "")])))]
7171 mask |= INTVAL (operands[2]) << 0;
7172 mask |= INTVAL (operands[3]) << 2;
7173 mask |= INTVAL (operands[4]) << 4;
7174 mask |= INTVAL (operands[5]) << 6;
7175 operands[2] = GEN_INT (mask);
7177 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7179 [(set_attr "type" "sselog1")
7180 (set_attr "prefix_data16" "1")
7181 (set_attr "prefix" "maybe_vex")
7182 (set_attr "length_immediate" "1")
7183 (set_attr "mode" "TI")])
7185 (define_expand "sse2_pshuflw"
7186 [(match_operand:V8HI 0 "register_operand" "")
7187 (match_operand:V8HI 1 "nonimmediate_operand" "")
7188 (match_operand:SI 2 "const_int_operand" "")]
7191 int mask = INTVAL (operands[2]);
7192 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7193 GEN_INT ((mask >> 0) & 3),
7194 GEN_INT ((mask >> 2) & 3),
7195 GEN_INT ((mask >> 4) & 3),
7196 GEN_INT ((mask >> 6) & 3)));
7200 (define_insn "sse2_pshuflw_1"
7201 [(set (match_operand:V8HI 0 "register_operand" "=x")
7203 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7204 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7205 (match_operand 3 "const_0_to_3_operand" "")
7206 (match_operand 4 "const_0_to_3_operand" "")
7207 (match_operand 5 "const_0_to_3_operand" "")
7215 mask |= INTVAL (operands[2]) << 0;
7216 mask |= INTVAL (operands[3]) << 2;
7217 mask |= INTVAL (operands[4]) << 4;
7218 mask |= INTVAL (operands[5]) << 6;
7219 operands[2] = GEN_INT (mask);
7221 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7223 [(set_attr "type" "sselog")
7224 (set_attr "prefix_data16" "0")
7225 (set_attr "prefix_rep" "1")
7226 (set_attr "prefix" "maybe_vex")
7227 (set_attr "length_immediate" "1")
7228 (set_attr "mode" "TI")])
7230 (define_expand "sse2_pshufhw"
7231 [(match_operand:V8HI 0 "register_operand" "")
7232 (match_operand:V8HI 1 "nonimmediate_operand" "")
7233 (match_operand:SI 2 "const_int_operand" "")]
7236 int mask = INTVAL (operands[2]);
7237 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7238 GEN_INT (((mask >> 0) & 3) + 4),
7239 GEN_INT (((mask >> 2) & 3) + 4),
7240 GEN_INT (((mask >> 4) & 3) + 4),
7241 GEN_INT (((mask >> 6) & 3) + 4)));
7245 (define_insn "sse2_pshufhw_1"
7246 [(set (match_operand:V8HI 0 "register_operand" "=x")
7248 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7249 (parallel [(const_int 0)
7253 (match_operand 2 "const_4_to_7_operand" "")
7254 (match_operand 3 "const_4_to_7_operand" "")
7255 (match_operand 4 "const_4_to_7_operand" "")
7256 (match_operand 5 "const_4_to_7_operand" "")])))]
7260 mask |= (INTVAL (operands[2]) - 4) << 0;
7261 mask |= (INTVAL (operands[3]) - 4) << 2;
7262 mask |= (INTVAL (operands[4]) - 4) << 4;
7263 mask |= (INTVAL (operands[5]) - 4) << 6;
7264 operands[2] = GEN_INT (mask);
7266 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7268 [(set_attr "type" "sselog")
7269 (set_attr "prefix_rep" "1")
7270 (set_attr "prefix_data16" "0")
7271 (set_attr "prefix" "maybe_vex")
7272 (set_attr "length_immediate" "1")
7273 (set_attr "mode" "TI")])
7275 (define_expand "sse2_loadd"
7276 [(set (match_operand:V4SI 0 "register_operand" "")
7279 (match_operand:SI 1 "nonimmediate_operand" ""))
7283 "operands[2] = CONST0_RTX (V4SImode);")
7285 (define_insn "*avx_loadld"
7286 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7289 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7290 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7294 vmovd\t{%2, %0|%0, %2}
7295 vmovd\t{%2, %0|%0, %2}
7296 vmovss\t{%2, %1, %0|%0, %1, %2}"
7297 [(set_attr "type" "ssemov")
7298 (set_attr "prefix" "vex")
7299 (set_attr "mode" "TI,TI,V4SF")])
7301 (define_insn "sse2_loadld"
7302 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7305 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7306 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7310 movd\t{%2, %0|%0, %2}
7311 movd\t{%2, %0|%0, %2}
7312 movss\t{%2, %0|%0, %2}
7313 movss\t{%2, %0|%0, %2}"
7314 [(set_attr "type" "ssemov")
7315 (set_attr "mode" "TI,TI,V4SF,SF")])
7317 (define_insn_and_split "sse2_stored"
7318 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7320 (match_operand:V4SI 1 "register_operand" "x,Yi")
7321 (parallel [(const_int 0)])))]
7324 "&& reload_completed
7325 && (TARGET_INTER_UNIT_MOVES
7326 || MEM_P (operands [0])
7327 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7328 [(set (match_dup 0) (match_dup 1))]
7330 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7333 (define_insn_and_split "*vec_ext_v4si_mem"
7334 [(set (match_operand:SI 0 "register_operand" "=r")
7336 (match_operand:V4SI 1 "memory_operand" "o")
7337 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7343 int i = INTVAL (operands[2]);
7345 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7349 (define_expand "sse_storeq"
7350 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7352 (match_operand:V2DI 1 "register_operand" "")
7353 (parallel [(const_int 0)])))]
7357 (define_insn "*sse2_storeq_rex64"
7358 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7360 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7361 (parallel [(const_int 0)])))]
7362 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7366 %vmov{q}\t{%1, %0|%0, %1}"
7367 [(set_attr "type" "*,*,imov")
7368 (set_attr "prefix" "*,*,maybe_vex")
7369 (set_attr "mode" "*,*,DI")])
7371 (define_insn "*sse2_storeq"
7372 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7374 (match_operand:V2DI 1 "register_operand" "x")
7375 (parallel [(const_int 0)])))]
7380 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7382 (match_operand:V2DI 1 "register_operand" "")
7383 (parallel [(const_int 0)])))]
7386 && (TARGET_INTER_UNIT_MOVES
7387 || MEM_P (operands [0])
7388 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7389 [(set (match_dup 0) (match_dup 1))]
7391 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7394 (define_insn "*vec_extractv2di_1_rex64_avx"
7395 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7397 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7398 (parallel [(const_int 1)])))]
7401 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7403 vmovhps\t{%1, %0|%0, %1}
7404 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7405 vmovq\t{%H1, %0|%0, %H1}
7406 vmov{q}\t{%H1, %0|%0, %H1}"
7407 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7408 (set_attr "length_immediate" "*,1,*,*")
7409 (set_attr "memory" "*,none,*,*")
7410 (set_attr "prefix" "vex")
7411 (set_attr "mode" "V2SF,TI,TI,DI")])
7413 (define_insn "*vec_extractv2di_1_rex64"
7414 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7416 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7417 (parallel [(const_int 1)])))]
7418 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7420 movhps\t{%1, %0|%0, %1}
7421 psrldq\t{$8, %0|%0, 8}
7422 movq\t{%H1, %0|%0, %H1}
7423 mov{q}\t{%H1, %0|%0, %H1}"
7424 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7425 (set_attr "length_immediate" "*,1,*,*")
7426 (set_attr "atom_unit" "*,sishuf,*,*")
7427 (set_attr "memory" "*,none,*,*")
7428 (set_attr "mode" "V2SF,TI,TI,DI")])
7430 (define_insn "*vec_extractv2di_1_avx"
7431 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7433 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7434 (parallel [(const_int 1)])))]
7437 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7439 vmovhps\t{%1, %0|%0, %1}
7440 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7441 vmovq\t{%H1, %0|%0, %H1}"
7442 [(set_attr "type" "ssemov,sseishft,ssemov")
7443 (set_attr "length_immediate" "*,1,*")
7444 (set_attr "memory" "*,none,*")
7445 (set_attr "prefix" "vex")
7446 (set_attr "mode" "V2SF,TI,TI")])
7448 (define_insn "*vec_extractv2di_1_sse2"
7449 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7451 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7452 (parallel [(const_int 1)])))]
7454 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7456 movhps\t{%1, %0|%0, %1}
7457 psrldq\t{$8, %0|%0, 8}
7458 movq\t{%H1, %0|%0, %H1}"
7459 [(set_attr "type" "ssemov,sseishft,ssemov")
7460 (set_attr "length_immediate" "*,1,*")
7461 (set_attr "atom_unit" "*,sishuf,*")
7462 (set_attr "memory" "*,none,*")
7463 (set_attr "mode" "V2SF,TI,TI")])
7465 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7466 (define_insn "*vec_extractv2di_1_sse"
7467 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7469 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7470 (parallel [(const_int 1)])))]
7471 "!TARGET_SSE2 && TARGET_SSE
7472 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7474 movhps\t{%1, %0|%0, %1}
7475 movhlps\t{%1, %0|%0, %1}
7476 movlps\t{%H1, %0|%0, %H1}"
7477 [(set_attr "type" "ssemov")
7478 (set_attr "mode" "V2SF,V4SF,V2SF")])
7480 (define_insn "*vec_dupv4si_avx"
7481 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7483 (match_operand:SI 1 "register_operand" "x,m")))]
7486 vpshufd\t{$0, %1, %0|%0, %1, 0}
7487 vbroadcastss\t{%1, %0|%0, %1}"
7488 [(set_attr "type" "sselog1,ssemov")
7489 (set_attr "length_immediate" "1,0")
7490 (set_attr "prefix_extra" "0,1")
7491 (set_attr "prefix" "vex")
7492 (set_attr "mode" "TI,V4SF")])
7494 (define_insn "*vec_dupv4si"
7495 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7497 (match_operand:SI 1 "register_operand" " Y2,0")))]
7500 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7501 shufps\t{$0, %0, %0|%0, %0, 0}"
7502 [(set_attr "type" "sselog1")
7503 (set_attr "length_immediate" "1")
7504 (set_attr "mode" "TI,V4SF")])
7506 (define_insn "*vec_dupv2di_avx"
7507 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7509 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7512 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7513 vmovddup\t{%1, %0|%0, %1}"
7514 [(set_attr "type" "sselog1")
7515 (set_attr "prefix" "vex")
7516 (set_attr "mode" "TI,DF")])
7518 (define_insn "*vec_dupv2di_sse3"
7519 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7521 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7525 movddup\t{%1, %0|%0, %1}"
7526 [(set_attr "type" "sselog1")
7527 (set_attr "mode" "TI,DF")])
7529 (define_insn "*vec_dupv2di"
7530 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7532 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7537 [(set_attr "type" "sselog1,ssemov")
7538 (set_attr "mode" "TI,V4SF")])
7540 (define_insn "*vec_concatv2si_avx"
7541 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7543 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7544 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7547 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7548 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7549 vmovd\t{%1, %0|%0, %1}
7550 punpckldq\t{%2, %0|%0, %2}
7551 movd\t{%1, %0|%0, %1}"
7552 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7553 (set_attr "prefix_extra" "1,*,*,*,*")
7554 (set_attr "length_immediate" "1,*,*,*,*")
7555 (set (attr "prefix")
7556 (if_then_else (eq_attr "alternative" "3,4")
7557 (const_string "orig")
7558 (const_string "vex")))
7559 (set_attr "mode" "TI,TI,TI,DI,DI")])
7561 (define_insn "*vec_concatv2si_sse4_1"
7562 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7564 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7565 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7568 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7569 punpckldq\t{%2, %0|%0, %2}
7570 movd\t{%1, %0|%0, %1}
7571 punpckldq\t{%2, %0|%0, %2}
7572 movd\t{%1, %0|%0, %1}"
7573 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7574 (set_attr "prefix_extra" "1,*,*,*,*")
7575 (set_attr "length_immediate" "1,*,*,*,*")
7576 (set_attr "mode" "TI,TI,TI,DI,DI")])
7578 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7579 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7580 ;; alternatives pretty much forces the MMX alternative to be chosen.
7581 (define_insn "*vec_concatv2si_sse2"
7582 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7584 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7585 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7588 punpckldq\t{%2, %0|%0, %2}
7589 movd\t{%1, %0|%0, %1}
7590 punpckldq\t{%2, %0|%0, %2}
7591 movd\t{%1, %0|%0, %1}"
7592 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7593 (set_attr "mode" "TI,TI,DI,DI")])
7595 (define_insn "*vec_concatv2si_sse"
7596 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7598 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7599 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7602 unpcklps\t{%2, %0|%0, %2}
7603 movss\t{%1, %0|%0, %1}
7604 punpckldq\t{%2, %0|%0, %2}
7605 movd\t{%1, %0|%0, %1}"
7606 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7607 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7609 (define_insn "*vec_concatv4si_1_avx"
7610 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7612 (match_operand:V2SI 1 "register_operand" " x,x")
7613 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7616 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7617 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7618 [(set_attr "type" "sselog,ssemov")
7619 (set_attr "prefix" "vex")
7620 (set_attr "mode" "TI,V2SF")])
7622 (define_insn "*vec_concatv4si_1"
7623 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7625 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7626 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7629 punpcklqdq\t{%2, %0|%0, %2}
7630 movlhps\t{%2, %0|%0, %2}
7631 movhps\t{%2, %0|%0, %2}"
7632 [(set_attr "type" "sselog,ssemov,ssemov")
7633 (set_attr "mode" "TI,V4SF,V2SF")])
7635 (define_insn "*vec_concatv2di_avx"
7636 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7638 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7639 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7640 "!TARGET_64BIT && TARGET_AVX"
7642 vmovq\t{%1, %0|%0, %1}
7643 movq2dq\t{%1, %0|%0, %1}
7644 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7645 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7646 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7647 (set (attr "prefix")
7648 (if_then_else (eq_attr "alternative" "1")
7649 (const_string "orig")
7650 (const_string "vex")))
7651 (set_attr "mode" "TI,TI,TI,V2SF")])
7653 (define_insn "vec_concatv2di"
7654 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7656 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7657 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7658 "!TARGET_64BIT && TARGET_SSE"
7660 movq\t{%1, %0|%0, %1}
7661 movq2dq\t{%1, %0|%0, %1}
7662 punpcklqdq\t{%2, %0|%0, %2}
7663 movlhps\t{%2, %0|%0, %2}
7664 movhps\t{%2, %0|%0, %2}"
7665 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7666 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7668 (define_insn "*vec_concatv2di_rex64_avx"
7669 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7671 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7672 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7673 "TARGET_64BIT && TARGET_AVX"
7675 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7676 vmovq\t{%1, %0|%0, %1}
7677 vmovq\t{%1, %0|%0, %1}
7678 movq2dq\t{%1, %0|%0, %1}
7679 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7680 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7681 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7682 (set_attr "prefix_extra" "1,*,*,*,*,*")
7683 (set_attr "length_immediate" "1,*,*,*,*,*")
7684 (set (attr "prefix")
7685 (if_then_else (eq_attr "alternative" "3")
7686 (const_string "orig")
7687 (const_string "vex")))
7688 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7690 (define_insn "*vec_concatv2di_rex64_sse4_1"
7691 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7693 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7694 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7695 "TARGET_64BIT && TARGET_SSE4_1"
7697 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7698 movq\t{%1, %0|%0, %1}
7699 movq\t{%1, %0|%0, %1}
7700 movq2dq\t{%1, %0|%0, %1}
7701 punpcklqdq\t{%2, %0|%0, %2}
7702 movlhps\t{%2, %0|%0, %2}
7703 movhps\t{%2, %0|%0, %2}"
7704 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7705 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7706 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7707 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7708 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7710 (define_insn "*vec_concatv2di_rex64_sse"
7711 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7713 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7714 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7715 "TARGET_64BIT && TARGET_SSE"
7717 movq\t{%1, %0|%0, %1}
7718 movq\t{%1, %0|%0, %1}
7719 movq2dq\t{%1, %0|%0, %1}
7720 punpcklqdq\t{%2, %0|%0, %2}
7721 movlhps\t{%2, %0|%0, %2}
7722 movhps\t{%2, %0|%0, %2}"
7723 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7724 (set_attr "prefix_rex" "*,1,*,*,*,*")
7725 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7727 (define_expand "vec_unpacku_hi_v16qi"
7728 [(match_operand:V8HI 0 "register_operand" "")
7729 (match_operand:V16QI 1 "register_operand" "")]
7733 ix86_expand_sse4_unpack (operands, true, true);
7735 ix86_expand_sse_unpack (operands, true, true);
7739 (define_expand "vec_unpacks_hi_v16qi"
7740 [(match_operand:V8HI 0 "register_operand" "")
7741 (match_operand:V16QI 1 "register_operand" "")]
7745 ix86_expand_sse4_unpack (operands, false, true);
7747 ix86_expand_sse_unpack (operands, false, true);
7751 (define_expand "vec_unpacku_lo_v16qi"
7752 [(match_operand:V8HI 0 "register_operand" "")
7753 (match_operand:V16QI 1 "register_operand" "")]
7757 ix86_expand_sse4_unpack (operands, true, false);
7759 ix86_expand_sse_unpack (operands, true, false);
7763 (define_expand "vec_unpacks_lo_v16qi"
7764 [(match_operand:V8HI 0 "register_operand" "")
7765 (match_operand:V16QI 1 "register_operand" "")]
7769 ix86_expand_sse4_unpack (operands, false, false);
7771 ix86_expand_sse_unpack (operands, false, false);
7775 (define_expand "vec_unpacku_hi_v8hi"
7776 [(match_operand:V4SI 0 "register_operand" "")
7777 (match_operand:V8HI 1 "register_operand" "")]
7781 ix86_expand_sse4_unpack (operands, true, true);
7783 ix86_expand_sse_unpack (operands, true, true);
7787 (define_expand "vec_unpacks_hi_v8hi"
7788 [(match_operand:V4SI 0 "register_operand" "")
7789 (match_operand:V8HI 1 "register_operand" "")]
7793 ix86_expand_sse4_unpack (operands, false, true);
7795 ix86_expand_sse_unpack (operands, false, true);
7799 (define_expand "vec_unpacku_lo_v8hi"
7800 [(match_operand:V4SI 0 "register_operand" "")
7801 (match_operand:V8HI 1 "register_operand" "")]
7805 ix86_expand_sse4_unpack (operands, true, false);
7807 ix86_expand_sse_unpack (operands, true, false);
7811 (define_expand "vec_unpacks_lo_v8hi"
7812 [(match_operand:V4SI 0 "register_operand" "")
7813 (match_operand:V8HI 1 "register_operand" "")]
7817 ix86_expand_sse4_unpack (operands, false, false);
7819 ix86_expand_sse_unpack (operands, false, false);
7823 (define_expand "vec_unpacku_hi_v4si"
7824 [(match_operand:V2DI 0 "register_operand" "")
7825 (match_operand:V4SI 1 "register_operand" "")]
7829 ix86_expand_sse4_unpack (operands, true, true);
7831 ix86_expand_sse_unpack (operands, true, true);
7835 (define_expand "vec_unpacks_hi_v4si"
7836 [(match_operand:V2DI 0 "register_operand" "")
7837 (match_operand:V4SI 1 "register_operand" "")]
7841 ix86_expand_sse4_unpack (operands, false, true);
7843 ix86_expand_sse_unpack (operands, false, true);
7847 (define_expand "vec_unpacku_lo_v4si"
7848 [(match_operand:V2DI 0 "register_operand" "")
7849 (match_operand:V4SI 1 "register_operand" "")]
7853 ix86_expand_sse4_unpack (operands, true, false);
7855 ix86_expand_sse_unpack (operands, true, false);
7859 (define_expand "vec_unpacks_lo_v4si"
7860 [(match_operand:V2DI 0 "register_operand" "")
7861 (match_operand:V4SI 1 "register_operand" "")]
7865 ix86_expand_sse4_unpack (operands, false, false);
7867 ix86_expand_sse_unpack (operands, false, false);
7871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7877 (define_expand "sse2_uavgv16qi3"
7878 [(set (match_operand:V16QI 0 "register_operand" "")
7884 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7886 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7887 (const_vector:V16QI [(const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)]))
7897 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7899 (define_insn "*avx_uavgv16qi3"
7900 [(set (match_operand:V16QI 0 "register_operand" "=x")
7906 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7908 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7909 (const_vector:V16QI [(const_int 1) (const_int 1)
7910 (const_int 1) (const_int 1)
7911 (const_int 1) (const_int 1)
7912 (const_int 1) (const_int 1)
7913 (const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)]))
7918 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7919 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7920 [(set_attr "type" "sseiadd")
7921 (set_attr "prefix" "vex")
7922 (set_attr "mode" "TI")])
7924 (define_insn "*sse2_uavgv16qi3"
7925 [(set (match_operand:V16QI 0 "register_operand" "=x")
7931 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7933 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7934 (const_vector:V16QI [(const_int 1) (const_int 1)
7935 (const_int 1) (const_int 1)
7936 (const_int 1) (const_int 1)
7937 (const_int 1) (const_int 1)
7938 (const_int 1) (const_int 1)
7939 (const_int 1) (const_int 1)
7940 (const_int 1) (const_int 1)
7941 (const_int 1) (const_int 1)]))
7943 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7944 "pavgb\t{%2, %0|%0, %2}"
7945 [(set_attr "type" "sseiadd")
7946 (set_attr "prefix_data16" "1")
7947 (set_attr "mode" "TI")])
7949 (define_expand "sse2_uavgv8hi3"
7950 [(set (match_operand:V8HI 0 "register_operand" "")
7956 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7958 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7959 (const_vector:V8HI [(const_int 1) (const_int 1)
7960 (const_int 1) (const_int 1)
7961 (const_int 1) (const_int 1)
7962 (const_int 1) (const_int 1)]))
7965 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7967 (define_insn "*avx_uavgv8hi3"
7968 [(set (match_operand:V8HI 0 "register_operand" "=x")
7974 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7976 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7977 (const_vector:V8HI [(const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)
7980 (const_int 1) (const_int 1)]))
7982 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7983 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7984 [(set_attr "type" "sseiadd")
7985 (set_attr "prefix" "vex")
7986 (set_attr "mode" "TI")])
7988 (define_insn "*sse2_uavgv8hi3"
7989 [(set (match_operand:V8HI 0 "register_operand" "=x")
7995 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7998 (const_vector:V8HI [(const_int 1) (const_int 1)
7999 (const_int 1) (const_int 1)
8000 (const_int 1) (const_int 1)
8001 (const_int 1) (const_int 1)]))
8003 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8004 "pavgw\t{%2, %0|%0, %2}"
8005 [(set_attr "type" "sseiadd")
8006 (set_attr "prefix_data16" "1")
8007 (set_attr "mode" "TI")])
8009 ;; The correct representation for this is absolutely enormous, and
8010 ;; surely not generally useful.
8011 (define_insn "*avx_psadbw"
8012 [(set (match_operand:V2DI 0 "register_operand" "=x")
8013 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8014 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8017 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8018 [(set_attr "type" "sseiadd")
8019 (set_attr "prefix" "vex")
8020 (set_attr "mode" "TI")])
8022 (define_insn "sse2_psadbw"
8023 [(set (match_operand:V2DI 0 "register_operand" "=x")
8024 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8025 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8028 "psadbw\t{%2, %0|%0, %2}"
8029 [(set_attr "type" "sseiadd")
8030 (set_attr "atom_unit" "simul")
8031 (set_attr "prefix_data16" "1")
8032 (set_attr "mode" "TI")])
8034 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
8035 [(set (match_operand:SI 0 "register_operand" "=r")
8037 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8039 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8040 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
8041 [(set_attr "type" "ssecvt")
8042 (set_attr "prefix" "vex")
8043 (set_attr "mode" "<MODE>")])
8045 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8046 [(set (match_operand:SI 0 "register_operand" "=r")
8048 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8051 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8052 [(set_attr "type" "ssemov")
8053 (set_attr "prefix" "maybe_vex")
8054 (set_attr "mode" "<MODE>")])
8056 (define_insn "sse2_pmovmskb"
8057 [(set (match_operand:SI 0 "register_operand" "=r")
8058 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8061 "%vpmovmskb\t{%1, %0|%0, %1}"
8062 [(set_attr "type" "ssemov")
8063 (set_attr "prefix_data16" "1")
8064 (set_attr "prefix" "maybe_vex")
8065 (set_attr "mode" "SI")])
8067 (define_expand "sse2_maskmovdqu"
8068 [(set (match_operand:V16QI 0 "memory_operand" "")
8069 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8070 (match_operand:V16QI 2 "register_operand" "")
8076 (define_insn "*sse2_maskmovdqu"
8077 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8078 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8079 (match_operand:V16QI 2 "register_operand" "x")
8080 (mem:V16QI (match_dup 0))]
8082 "TARGET_SSE2 && !TARGET_64BIT"
8083 ;; @@@ check ordering of operands in intel/nonintel syntax
8084 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8085 [(set_attr "type" "ssemov")
8086 (set_attr "prefix_data16" "1")
8087 ;; The implicit %rdi operand confuses default length_vex computation.
8088 (set_attr "length_vex" "3")
8089 (set_attr "prefix" "maybe_vex")
8090 (set_attr "mode" "TI")])
8092 (define_insn "*sse2_maskmovdqu_rex64"
8093 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8094 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8095 (match_operand:V16QI 2 "register_operand" "x")
8096 (mem:V16QI (match_dup 0))]
8098 "TARGET_SSE2 && TARGET_64BIT"
8099 ;; @@@ check ordering of operands in intel/nonintel syntax
8100 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8101 [(set_attr "type" "ssemov")
8102 (set_attr "prefix_data16" "1")
8103 ;; The implicit %rdi operand confuses default length_vex computation.
8104 (set (attr "length_vex")
8105 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8106 (set_attr "prefix" "maybe_vex")
8107 (set_attr "mode" "TI")])
8109 (define_insn "sse_ldmxcsr"
8110 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8114 [(set_attr "type" "sse")
8115 (set_attr "atom_sse_attr" "mxcsr")
8116 (set_attr "prefix" "maybe_vex")
8117 (set_attr "memory" "load")])
8119 (define_insn "sse_stmxcsr"
8120 [(set (match_operand:SI 0 "memory_operand" "=m")
8121 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8124 [(set_attr "type" "sse")
8125 (set_attr "atom_sse_attr" "mxcsr")
8126 (set_attr "prefix" "maybe_vex")
8127 (set_attr "memory" "store")])
8129 (define_expand "sse_sfence"
8131 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8132 "TARGET_SSE || TARGET_3DNOW_A"
8134 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8135 MEM_VOLATILE_P (operands[0]) = 1;
8138 (define_insn "*sse_sfence"
8139 [(set (match_operand:BLK 0 "" "")
8140 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8141 "TARGET_SSE || TARGET_3DNOW_A"
8143 [(set_attr "type" "sse")
8144 (set_attr "length_address" "0")
8145 (set_attr "atom_sse_attr" "fence")
8146 (set_attr "memory" "unknown")])
8148 (define_insn "sse2_clflush"
8149 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8153 [(set_attr "type" "sse")
8154 (set_attr "atom_sse_attr" "fence")
8155 (set_attr "memory" "unknown")])
8157 (define_expand "sse2_mfence"
8159 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8162 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8163 MEM_VOLATILE_P (operands[0]) = 1;
8166 (define_insn "*sse2_mfence"
8167 [(set (match_operand:BLK 0 "" "")
8168 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8169 "TARGET_64BIT || TARGET_SSE2"
8171 [(set_attr "type" "sse")
8172 (set_attr "length_address" "0")
8173 (set_attr "atom_sse_attr" "fence")
8174 (set_attr "memory" "unknown")])
8176 (define_expand "sse2_lfence"
8178 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8181 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8182 MEM_VOLATILE_P (operands[0]) = 1;
8185 (define_insn "*sse2_lfence"
8186 [(set (match_operand:BLK 0 "" "")
8187 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8190 [(set_attr "type" "sse")
8191 (set_attr "length_address" "0")
8192 (set_attr "atom_sse_attr" "lfence")
8193 (set_attr "memory" "unknown")])
8195 (define_insn "sse3_mwait"
8196 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8197 (match_operand:SI 1 "register_operand" "c")]
8200 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8201 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8202 ;; we only need to set up 32bit registers.
8204 [(set_attr "length" "3")])
8206 (define_insn "sse3_monitor"
8207 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8208 (match_operand:SI 1 "register_operand" "c")
8209 (match_operand:SI 2 "register_operand" "d")]
8211 "TARGET_SSE3 && !TARGET_64BIT"
8212 "monitor\t%0, %1, %2"
8213 [(set_attr "length" "3")])
8215 (define_insn "sse3_monitor64"
8216 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8217 (match_operand:SI 1 "register_operand" "c")
8218 (match_operand:SI 2 "register_operand" "d")]
8220 "TARGET_SSE3 && TARGET_64BIT"
8221 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8222 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8223 ;; zero extended to 64bit, we only need to set up 32bit registers.
8225 [(set_attr "length" "3")])
8227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8229 ;; SSSE3 instructions
8231 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8233 (define_insn "*avx_phaddwv8hi3"
8234 [(set (match_operand:V8HI 0 "register_operand" "=x")
8240 (match_operand:V8HI 1 "register_operand" "x")
8241 (parallel [(const_int 0)]))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8244 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8248 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8249 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8251 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8252 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8257 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8258 (parallel [(const_int 0)]))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8261 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8262 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8265 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8266 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8268 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8269 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8271 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8272 [(set_attr "type" "sseiadd")
8273 (set_attr "prefix_extra" "1")
8274 (set_attr "prefix" "vex")
8275 (set_attr "mode" "TI")])
8277 (define_insn "ssse3_phaddwv8hi3"
8278 [(set (match_operand:V8HI 0 "register_operand" "=x")
8284 (match_operand:V8HI 1 "register_operand" "0")
8285 (parallel [(const_int 0)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8288 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8289 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8292 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8293 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8295 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8296 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8301 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8302 (parallel [(const_int 0)]))
8303 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8305 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8306 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8309 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8310 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8312 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8313 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8315 "phaddw\t{%2, %0|%0, %2}"
8316 [(set_attr "type" "sseiadd")
8317 (set_attr "atom_unit" "complex")
8318 (set_attr "prefix_data16" "1")
8319 (set_attr "prefix_extra" "1")
8320 (set_attr "mode" "TI")])
8322 (define_insn "ssse3_phaddwv4hi3"
8323 [(set (match_operand:V4HI 0 "register_operand" "=y")
8328 (match_operand:V4HI 1 "register_operand" "0")
8329 (parallel [(const_int 0)]))
8330 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8332 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8337 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8338 (parallel [(const_int 0)]))
8339 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8342 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8344 "phaddw\t{%2, %0|%0, %2}"
8345 [(set_attr "type" "sseiadd")
8346 (set_attr "atom_unit" "complex")
8347 (set_attr "prefix_extra" "1")
8348 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8349 (set_attr "mode" "DI")])
8351 (define_insn "*avx_phadddv4si3"
8352 [(set (match_operand:V4SI 0 "register_operand" "=x")
8357 (match_operand:V4SI 1 "register_operand" "x")
8358 (parallel [(const_int 0)]))
8359 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8361 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8362 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8366 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8367 (parallel [(const_int 0)]))
8368 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8370 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8371 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8373 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8374 [(set_attr "type" "sseiadd")
8375 (set_attr "prefix_extra" "1")
8376 (set_attr "prefix" "vex")
8377 (set_attr "mode" "TI")])
8379 (define_insn "ssse3_phadddv4si3"
8380 [(set (match_operand:V4SI 0 "register_operand" "=x")
8385 (match_operand:V4SI 1 "register_operand" "0")
8386 (parallel [(const_int 0)]))
8387 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8389 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8390 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8394 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8395 (parallel [(const_int 0)]))
8396 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8398 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8399 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8401 "phaddd\t{%2, %0|%0, %2}"
8402 [(set_attr "type" "sseiadd")
8403 (set_attr "atom_unit" "complex")
8404 (set_attr "prefix_data16" "1")
8405 (set_attr "prefix_extra" "1")
8406 (set_attr "mode" "TI")])
8408 (define_insn "ssse3_phadddv2si3"
8409 [(set (match_operand:V2SI 0 "register_operand" "=y")
8413 (match_operand:V2SI 1 "register_operand" "0")
8414 (parallel [(const_int 0)]))
8415 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8418 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8419 (parallel [(const_int 0)]))
8420 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8422 "phaddd\t{%2, %0|%0, %2}"
8423 [(set_attr "type" "sseiadd")
8424 (set_attr "atom_unit" "complex")
8425 (set_attr "prefix_extra" "1")
8426 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8427 (set_attr "mode" "DI")])
8429 (define_insn "*avx_phaddswv8hi3"
8430 [(set (match_operand:V8HI 0 "register_operand" "=x")
8436 (match_operand:V8HI 1 "register_operand" "x")
8437 (parallel [(const_int 0)]))
8438 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8453 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8454 (parallel [(const_int 0)]))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8457 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8458 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8461 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8462 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8464 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8465 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8467 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8468 [(set_attr "type" "sseiadd")
8469 (set_attr "prefix_extra" "1")
8470 (set_attr "prefix" "vex")
8471 (set_attr "mode" "TI")])
8473 (define_insn "ssse3_phaddswv8hi3"
8474 [(set (match_operand:V8HI 0 "register_operand" "=x")
8480 (match_operand:V8HI 1 "register_operand" "0")
8481 (parallel [(const_int 0)]))
8482 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8484 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8485 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8488 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8489 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8491 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8492 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8497 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8498 (parallel [(const_int 0)]))
8499 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8501 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8502 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8505 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8506 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8508 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8509 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8511 "phaddsw\t{%2, %0|%0, %2}"
8512 [(set_attr "type" "sseiadd")
8513 (set_attr "atom_unit" "complex")
8514 (set_attr "prefix_data16" "1")
8515 (set_attr "prefix_extra" "1")
8516 (set_attr "mode" "TI")])
8518 (define_insn "ssse3_phaddswv4hi3"
8519 [(set (match_operand:V4HI 0 "register_operand" "=y")
8524 (match_operand:V4HI 1 "register_operand" "0")
8525 (parallel [(const_int 0)]))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8528 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8533 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8534 (parallel [(const_int 0)]))
8535 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8537 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8538 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8540 "phaddsw\t{%2, %0|%0, %2}"
8541 [(set_attr "type" "sseiadd")
8542 (set_attr "atom_unit" "complex")
8543 (set_attr "prefix_extra" "1")
8544 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8545 (set_attr "mode" "DI")])
8547 (define_insn "*avx_phsubwv8hi3"
8548 [(set (match_operand:V8HI 0 "register_operand" "=x")
8554 (match_operand:V8HI 1 "register_operand" "x")
8555 (parallel [(const_int 0)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8562 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8563 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8565 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8566 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8571 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8572 (parallel [(const_int 0)]))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8575 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8579 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8580 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8582 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8583 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8585 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8586 [(set_attr "type" "sseiadd")
8587 (set_attr "prefix_extra" "1")
8588 (set_attr "prefix" "vex")
8589 (set_attr "mode" "TI")])
8591 (define_insn "ssse3_phsubwv8hi3"
8592 [(set (match_operand:V8HI 0 "register_operand" "=x")
8598 (match_operand:V8HI 1 "register_operand" "0")
8599 (parallel [(const_int 0)]))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8603 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8606 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8607 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8610 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8615 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8616 (parallel [(const_int 0)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8627 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8629 "phsubw\t{%2, %0|%0, %2}"
8630 [(set_attr "type" "sseiadd")
8631 (set_attr "atom_unit" "complex")
8632 (set_attr "prefix_data16" "1")
8633 (set_attr "prefix_extra" "1")
8634 (set_attr "mode" "TI")])
8636 (define_insn "ssse3_phsubwv4hi3"
8637 [(set (match_operand:V4HI 0 "register_operand" "=y")
8642 (match_operand:V4HI 1 "register_operand" "0")
8643 (parallel [(const_int 0)]))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8646 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8651 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8652 (parallel [(const_int 0)]))
8653 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8655 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8656 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8658 "phsubw\t{%2, %0|%0, %2}"
8659 [(set_attr "type" "sseiadd")
8660 (set_attr "atom_unit" "complex")
8661 (set_attr "prefix_extra" "1")
8662 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8663 (set_attr "mode" "DI")])
8665 (define_insn "*avx_phsubdv4si3"
8666 [(set (match_operand:V4SI 0 "register_operand" "=x")
8671 (match_operand:V4SI 1 "register_operand" "x")
8672 (parallel [(const_int 0)]))
8673 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8675 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8676 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8680 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8681 (parallel [(const_int 0)]))
8682 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8684 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8685 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8687 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8688 [(set_attr "type" "sseiadd")
8689 (set_attr "prefix_extra" "1")
8690 (set_attr "prefix" "vex")
8691 (set_attr "mode" "TI")])
8693 (define_insn "ssse3_phsubdv4si3"
8694 [(set (match_operand:V4SI 0 "register_operand" "=x")
8699 (match_operand:V4SI 1 "register_operand" "0")
8700 (parallel [(const_int 0)]))
8701 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8703 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8704 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8708 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8709 (parallel [(const_int 0)]))
8710 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8712 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8713 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8715 "phsubd\t{%2, %0|%0, %2}"
8716 [(set_attr "type" "sseiadd")
8717 (set_attr "atom_unit" "complex")
8718 (set_attr "prefix_data16" "1")
8719 (set_attr "prefix_extra" "1")
8720 (set_attr "mode" "TI")])
8722 (define_insn "ssse3_phsubdv2si3"
8723 [(set (match_operand:V2SI 0 "register_operand" "=y")
8727 (match_operand:V2SI 1 "register_operand" "0")
8728 (parallel [(const_int 0)]))
8729 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8732 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8733 (parallel [(const_int 0)]))
8734 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8736 "phsubd\t{%2, %0|%0, %2}"
8737 [(set_attr "type" "sseiadd")
8738 (set_attr "atom_unit" "complex")
8739 (set_attr "prefix_extra" "1")
8740 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8741 (set_attr "mode" "DI")])
8743 (define_insn "*avx_phsubswv8hi3"
8744 [(set (match_operand:V8HI 0 "register_operand" "=x")
8750 (match_operand:V8HI 1 "register_operand" "x")
8751 (parallel [(const_int 0)]))
8752 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8754 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8755 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8758 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8759 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8761 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8762 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8767 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8768 (parallel [(const_int 0)]))
8769 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8771 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8772 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8775 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8776 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8778 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8779 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8781 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8782 [(set_attr "type" "sseiadd")
8783 (set_attr "prefix_extra" "1")
8784 (set_attr "prefix" "vex")
8785 (set_attr "mode" "TI")])
8787 (define_insn "ssse3_phsubswv8hi3"
8788 [(set (match_operand:V8HI 0 "register_operand" "=x")
8794 (match_operand:V8HI 1 "register_operand" "0")
8795 (parallel [(const_int 0)]))
8796 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8798 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8799 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8802 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8803 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8805 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8806 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8811 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8812 (parallel [(const_int 0)]))
8813 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8815 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8819 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8822 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8823 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8825 "phsubsw\t{%2, %0|%0, %2}"
8826 [(set_attr "type" "sseiadd")
8827 (set_attr "atom_unit" "complex")
8828 (set_attr "prefix_data16" "1")
8829 (set_attr "prefix_extra" "1")
8830 (set_attr "mode" "TI")])
8832 (define_insn "ssse3_phsubswv4hi3"
8833 [(set (match_operand:V4HI 0 "register_operand" "=y")
8838 (match_operand:V4HI 1 "register_operand" "0")
8839 (parallel [(const_int 0)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8842 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8847 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8848 (parallel [(const_int 0)]))
8849 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8851 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8852 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8854 "phsubsw\t{%2, %0|%0, %2}"
8855 [(set_attr "type" "sseiadd")
8856 (set_attr "atom_unit" "complex")
8857 (set_attr "prefix_extra" "1")
8858 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8859 (set_attr "mode" "DI")])
8861 (define_insn "*avx_pmaddubsw128"
8862 [(set (match_operand:V8HI 0 "register_operand" "=x")
8867 (match_operand:V16QI 1 "register_operand" "x")
8868 (parallel [(const_int 0)
8878 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8879 (parallel [(const_int 0)
8889 (vec_select:V16QI (match_dup 1)
8890 (parallel [(const_int 1)
8899 (vec_select:V16QI (match_dup 2)
8900 (parallel [(const_int 1)
8907 (const_int 15)]))))))]
8909 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8910 [(set_attr "type" "sseiadd")
8911 (set_attr "prefix_extra" "1")
8912 (set_attr "prefix" "vex")
8913 (set_attr "mode" "TI")])
8915 (define_insn "ssse3_pmaddubsw128"
8916 [(set (match_operand:V8HI 0 "register_operand" "=x")
8921 (match_operand:V16QI 1 "register_operand" "0")
8922 (parallel [(const_int 0)
8932 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8933 (parallel [(const_int 0)
8943 (vec_select:V16QI (match_dup 1)
8944 (parallel [(const_int 1)
8953 (vec_select:V16QI (match_dup 2)
8954 (parallel [(const_int 1)
8961 (const_int 15)]))))))]
8963 "pmaddubsw\t{%2, %0|%0, %2}"
8964 [(set_attr "type" "sseiadd")
8965 (set_attr "atom_unit" "simul")
8966 (set_attr "prefix_data16" "1")
8967 (set_attr "prefix_extra" "1")
8968 (set_attr "mode" "TI")])
8970 (define_insn "ssse3_pmaddubsw"
8971 [(set (match_operand:V4HI 0 "register_operand" "=y")
8976 (match_operand:V8QI 1 "register_operand" "0")
8977 (parallel [(const_int 0)
8983 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8984 (parallel [(const_int 0)
8990 (vec_select:V8QI (match_dup 1)
8991 (parallel [(const_int 1)
8996 (vec_select:V8QI (match_dup 2)
8997 (parallel [(const_int 1)
9000 (const_int 7)]))))))]
9002 "pmaddubsw\t{%2, %0|%0, %2}"
9003 [(set_attr "type" "sseiadd")
9004 (set_attr "atom_unit" "simul")
9005 (set_attr "prefix_extra" "1")
9006 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9007 (set_attr "mode" "DI")])
9009 (define_expand "ssse3_pmulhrswv8hi3"
9010 [(set (match_operand:V8HI 0 "register_operand" "")
9017 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9019 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9021 (const_vector:V8HI [(const_int 1) (const_int 1)
9022 (const_int 1) (const_int 1)
9023 (const_int 1) (const_int 1)
9024 (const_int 1) (const_int 1)]))
9027 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9029 (define_insn "*avx_pmulhrswv8hi3"
9030 [(set (match_operand:V8HI 0 "register_operand" "=x")
9037 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9041 (const_vector:V8HI [(const_int 1) (const_int 1)
9042 (const_int 1) (const_int 1)
9043 (const_int 1) (const_int 1)
9044 (const_int 1) (const_int 1)]))
9046 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9047 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9048 [(set_attr "type" "sseimul")
9049 (set_attr "prefix_extra" "1")
9050 (set_attr "prefix" "vex")
9051 (set_attr "mode" "TI")])
9053 (define_insn "*ssse3_pmulhrswv8hi3"
9054 [(set (match_operand:V8HI 0 "register_operand" "=x")
9061 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9063 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9065 (const_vector:V8HI [(const_int 1) (const_int 1)
9066 (const_int 1) (const_int 1)
9067 (const_int 1) (const_int 1)
9068 (const_int 1) (const_int 1)]))
9070 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9071 "pmulhrsw\t{%2, %0|%0, %2}"
9072 [(set_attr "type" "sseimul")
9073 (set_attr "prefix_data16" "1")
9074 (set_attr "prefix_extra" "1")
9075 (set_attr "mode" "TI")])
9077 (define_expand "ssse3_pmulhrswv4hi3"
9078 [(set (match_operand:V4HI 0 "register_operand" "")
9085 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9087 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9089 (const_vector:V4HI [(const_int 1) (const_int 1)
9090 (const_int 1) (const_int 1)]))
9093 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9095 (define_insn "*ssse3_pmulhrswv4hi3"
9096 [(set (match_operand:V4HI 0 "register_operand" "=y")
9103 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9105 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9107 (const_vector:V4HI [(const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)]))
9110 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9111 "pmulhrsw\t{%2, %0|%0, %2}"
9112 [(set_attr "type" "sseimul")
9113 (set_attr "prefix_extra" "1")
9114 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9115 (set_attr "mode" "DI")])
9117 (define_insn "*avx_pshufbv16qi3"
9118 [(set (match_operand:V16QI 0 "register_operand" "=x")
9119 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9120 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9123 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9124 [(set_attr "type" "sselog1")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "prefix" "vex")
9127 (set_attr "mode" "TI")])
9129 (define_insn "ssse3_pshufbv16qi3"
9130 [(set (match_operand:V16QI 0 "register_operand" "=x")
9131 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9132 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9135 "pshufb\t{%2, %0|%0, %2}";
9136 [(set_attr "type" "sselog1")
9137 (set_attr "prefix_data16" "1")
9138 (set_attr "prefix_extra" "1")
9139 (set_attr "mode" "TI")])
9141 (define_insn "ssse3_pshufbv8qi3"
9142 [(set (match_operand:V8QI 0 "register_operand" "=y")
9143 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9144 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9147 "pshufb\t{%2, %0|%0, %2}";
9148 [(set_attr "type" "sselog1")
9149 (set_attr "prefix_extra" "1")
9150 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9151 (set_attr "mode" "DI")])
9153 (define_insn "*avx_psign<mode>3"
9154 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9156 [(match_operand:SSEMODE124 1 "register_operand" "x")
9157 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9160 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9161 [(set_attr "type" "sselog1")
9162 (set_attr "prefix_extra" "1")
9163 (set_attr "prefix" "vex")
9164 (set_attr "mode" "TI")])
9166 (define_insn "ssse3_psign<mode>3"
9167 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9169 [(match_operand:SSEMODE124 1 "register_operand" "0")
9170 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9173 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9174 [(set_attr "type" "sselog1")
9175 (set_attr "prefix_data16" "1")
9176 (set_attr "prefix_extra" "1")
9177 (set_attr "mode" "TI")])
9179 (define_insn "ssse3_psign<mode>3"
9180 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9182 [(match_operand:MMXMODEI 1 "register_operand" "0")
9183 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9186 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9187 [(set_attr "type" "sselog1")
9188 (set_attr "prefix_extra" "1")
9189 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9190 (set_attr "mode" "DI")])
9192 (define_insn "*avx_palignrti"
9193 [(set (match_operand:TI 0 "register_operand" "=x")
9194 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9195 (match_operand:TI 2 "nonimmediate_operand" "xm")
9196 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9200 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9201 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9203 [(set_attr "type" "sseishft")
9204 (set_attr "prefix_extra" "1")
9205 (set_attr "length_immediate" "1")
9206 (set_attr "prefix" "vex")
9207 (set_attr "mode" "TI")])
9209 (define_insn "ssse3_palignrti"
9210 [(set (match_operand:TI 0 "register_operand" "=x")
9211 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9212 (match_operand:TI 2 "nonimmediate_operand" "xm")
9213 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9217 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9218 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9220 [(set_attr "type" "sseishft")
9221 (set_attr "atom_unit" "sishuf")
9222 (set_attr "prefix_data16" "1")
9223 (set_attr "prefix_extra" "1")
9224 (set_attr "length_immediate" "1")
9225 (set_attr "mode" "TI")])
9227 (define_insn "ssse3_palignrdi"
9228 [(set (match_operand:DI 0 "register_operand" "=y")
9229 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9230 (match_operand:DI 2 "nonimmediate_operand" "ym")
9231 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9235 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9236 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9238 [(set_attr "type" "sseishft")
9239 (set_attr "atom_unit" "sishuf")
9240 (set_attr "prefix_extra" "1")
9241 (set_attr "length_immediate" "1")
9242 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9243 (set_attr "mode" "DI")])
9245 (define_insn "abs<mode>2"
9246 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9247 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9249 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9250 [(set_attr "type" "sselog1")
9251 (set_attr "prefix_data16" "1")
9252 (set_attr "prefix_extra" "1")
9253 (set_attr "prefix" "maybe_vex")
9254 (set_attr "mode" "TI")])
9256 (define_insn "abs<mode>2"
9257 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9258 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9260 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9261 [(set_attr "type" "sselog1")
9262 (set_attr "prefix_rep" "0")
9263 (set_attr "prefix_extra" "1")
9264 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9265 (set_attr "mode" "DI")])
9267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9269 ;; AMD SSE4A instructions
9271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9273 (define_insn "sse4a_movnt<mode>"
9274 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9276 [(match_operand:MODEF 1 "register_operand" "x")]
9279 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9280 [(set_attr "type" "ssemov")
9281 (set_attr "mode" "<MODE>")])
9283 (define_insn "sse4a_vmmovnt<mode>"
9284 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9285 (unspec:<ssescalarmode>
9286 [(vec_select:<ssescalarmode>
9287 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9288 (parallel [(const_int 0)]))]
9291 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9292 [(set_attr "type" "ssemov")
9293 (set_attr "mode" "<ssescalarmode>")])
9295 (define_insn "sse4a_extrqi"
9296 [(set (match_operand:V2DI 0 "register_operand" "=x")
9297 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9298 (match_operand 2 "const_int_operand" "")
9299 (match_operand 3 "const_int_operand" "")]
9302 "extrq\t{%3, %2, %0|%0, %2, %3}"
9303 [(set_attr "type" "sse")
9304 (set_attr "prefix_data16" "1")
9305 (set_attr "length_immediate" "2")
9306 (set_attr "mode" "TI")])
9308 (define_insn "sse4a_extrq"
9309 [(set (match_operand:V2DI 0 "register_operand" "=x")
9310 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9311 (match_operand:V16QI 2 "register_operand" "x")]
9314 "extrq\t{%2, %0|%0, %2}"
9315 [(set_attr "type" "sse")
9316 (set_attr "prefix_data16" "1")
9317 (set_attr "mode" "TI")])
9319 (define_insn "sse4a_insertqi"
9320 [(set (match_operand:V2DI 0 "register_operand" "=x")
9321 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9322 (match_operand:V2DI 2 "register_operand" "x")
9323 (match_operand 3 "const_int_operand" "")
9324 (match_operand 4 "const_int_operand" "")]
9327 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9328 [(set_attr "type" "sseins")
9329 (set_attr "prefix_data16" "0")
9330 (set_attr "prefix_rep" "1")
9331 (set_attr "length_immediate" "2")
9332 (set_attr "mode" "TI")])
9334 (define_insn "sse4a_insertq"
9335 [(set (match_operand:V2DI 0 "register_operand" "=x")
9336 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9337 (match_operand:V2DI 2 "register_operand" "x")]
9340 "insertq\t{%2, %0|%0, %2}"
9341 [(set_attr "type" "sseins")
9342 (set_attr "prefix_data16" "0")
9343 (set_attr "prefix_rep" "1")
9344 (set_attr "mode" "TI")])
9346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9348 ;; Intel SSE4.1 instructions
9350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9352 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9353 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9354 (vec_merge:AVXMODEF2P
9355 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9356 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9357 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9359 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9360 [(set_attr "type" "ssemov")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "length_immediate" "1")
9363 (set_attr "prefix" "vex")
9364 (set_attr "mode" "<avxvecmode>")])
9366 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9367 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9369 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9370 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9371 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9374 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9375 [(set_attr "type" "ssemov")
9376 (set_attr "prefix_extra" "1")
9377 (set_attr "length_immediate" "1")
9378 (set_attr "prefix" "vex")
9379 (set_attr "mode" "<avxvecmode>")])
9381 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9382 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9383 (vec_merge:SSEMODEF2P
9384 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9385 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9386 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9388 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9389 [(set_attr "type" "ssemov")
9390 (set_attr "prefix_data16" "1")
9391 (set_attr "prefix_extra" "1")
9392 (set_attr "length_immediate" "1")
9393 (set_attr "mode" "<MODE>")])
9395 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9396 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9398 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9399 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9400 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9403 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9404 [(set_attr "type" "ssemov")
9405 (set_attr "prefix_data16" "1")
9406 (set_attr "prefix_extra" "1")
9407 (set_attr "mode" "<MODE>")])
9409 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9410 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9412 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9413 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9414 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9417 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9418 [(set_attr "type" "ssemul")
9419 (set_attr "prefix" "vex")
9420 (set_attr "prefix_extra" "1")
9421 (set_attr "length_immediate" "1")
9422 (set_attr "mode" "<avxvecmode>")])
9424 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9425 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9427 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9428 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9429 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9432 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9433 [(set_attr "type" "ssemul")
9434 (set_attr "prefix_data16" "1")
9435 (set_attr "prefix_extra" "1")
9436 (set_attr "length_immediate" "1")
9437 (set_attr "mode" "<MODE>")])
9439 (define_insn "sse4_1_movntdqa"
9440 [(set (match_operand:V2DI 0 "register_operand" "=x")
9441 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9444 "%vmovntdqa\t{%1, %0|%0, %1}"
9445 [(set_attr "type" "ssemov")
9446 (set_attr "prefix_extra" "1")
9447 (set_attr "prefix" "maybe_vex")
9448 (set_attr "mode" "TI")])
9450 (define_insn "*avx_mpsadbw"
9451 [(set (match_operand:V16QI 0 "register_operand" "=x")
9452 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9453 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9454 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9457 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9458 [(set_attr "type" "sselog1")
9459 (set_attr "prefix" "vex")
9460 (set_attr "prefix_extra" "1")
9461 (set_attr "length_immediate" "1")
9462 (set_attr "mode" "TI")])
9464 (define_insn "sse4_1_mpsadbw"
9465 [(set (match_operand:V16QI 0 "register_operand" "=x")
9466 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9467 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9468 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9471 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9472 [(set_attr "type" "sselog1")
9473 (set_attr "prefix_extra" "1")
9474 (set_attr "length_immediate" "1")
9475 (set_attr "mode" "TI")])
9477 (define_insn "*avx_packusdw"
9478 [(set (match_operand:V8HI 0 "register_operand" "=x")
9481 (match_operand:V4SI 1 "register_operand" "x"))
9483 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9485 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9486 [(set_attr "type" "sselog")
9487 (set_attr "prefix_extra" "1")
9488 (set_attr "prefix" "vex")
9489 (set_attr "mode" "TI")])
9491 (define_insn "sse4_1_packusdw"
9492 [(set (match_operand:V8HI 0 "register_operand" "=x")
9495 (match_operand:V4SI 1 "register_operand" "0"))
9497 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9499 "packusdw\t{%2, %0|%0, %2}"
9500 [(set_attr "type" "sselog")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "mode" "TI")])
9504 (define_insn "*avx_pblendvb"
9505 [(set (match_operand:V16QI 0 "register_operand" "=x")
9506 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9507 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9508 (match_operand:V16QI 3 "register_operand" "x")]
9511 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9512 [(set_attr "type" "ssemov")
9513 (set_attr "prefix_extra" "1")
9514 (set_attr "length_immediate" "1")
9515 (set_attr "prefix" "vex")
9516 (set_attr "mode" "TI")])
9518 (define_insn "sse4_1_pblendvb"
9519 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9520 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9521 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9522 (match_operand:V16QI 3 "register_operand" "Yz")]
9525 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9526 [(set_attr "type" "ssemov")
9527 (set_attr "prefix_extra" "1")
9528 (set_attr "mode" "TI")])
9530 (define_insn "*avx_pblendw"
9531 [(set (match_operand:V8HI 0 "register_operand" "=x")
9533 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9534 (match_operand:V8HI 1 "register_operand" "x")
9535 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9537 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9538 [(set_attr "type" "ssemov")
9539 (set_attr "prefix" "vex")
9540 (set_attr "prefix_extra" "1")
9541 (set_attr "length_immediate" "1")
9542 (set_attr "mode" "TI")])
9544 (define_insn "sse4_1_pblendw"
9545 [(set (match_operand:V8HI 0 "register_operand" "=x")
9547 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9548 (match_operand:V8HI 1 "register_operand" "0")
9549 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9551 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9552 [(set_attr "type" "ssemov")
9553 (set_attr "prefix_extra" "1")
9554 (set_attr "length_immediate" "1")
9555 (set_attr "mode" "TI")])
9557 (define_insn "sse4_1_phminposuw"
9558 [(set (match_operand:V8HI 0 "register_operand" "=x")
9559 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9560 UNSPEC_PHMINPOSUW))]
9562 "%vphminposuw\t{%1, %0|%0, %1}"
9563 [(set_attr "type" "sselog1")
9564 (set_attr "prefix_extra" "1")
9565 (set_attr "prefix" "maybe_vex")
9566 (set_attr "mode" "TI")])
9568 (define_insn "sse4_1_extendv8qiv8hi2"
9569 [(set (match_operand:V8HI 0 "register_operand" "=x")
9572 (match_operand:V16QI 1 "register_operand" "x")
9573 (parallel [(const_int 0)
9582 "%vpmovsxbw\t{%1, %0|%0, %1}"
9583 [(set_attr "type" "ssemov")
9584 (set_attr "prefix_extra" "1")
9585 (set_attr "prefix" "maybe_vex")
9586 (set_attr "mode" "TI")])
9588 (define_insn "*sse4_1_extendv8qiv8hi2"
9589 [(set (match_operand:V8HI 0 "register_operand" "=x")
9592 (vec_duplicate:V16QI
9593 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9594 (parallel [(const_int 0)
9603 "%vpmovsxbw\t{%1, %0|%0, %1}"
9604 [(set_attr "type" "ssemov")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "mode" "TI")])
9609 (define_insn "sse4_1_extendv4qiv4si2"
9610 [(set (match_operand:V4SI 0 "register_operand" "=x")
9613 (match_operand:V16QI 1 "register_operand" "x")
9614 (parallel [(const_int 0)
9619 "%vpmovsxbd\t{%1, %0|%0, %1}"
9620 [(set_attr "type" "ssemov")
9621 (set_attr "prefix_extra" "1")
9622 (set_attr "prefix" "maybe_vex")
9623 (set_attr "mode" "TI")])
9625 (define_insn "*sse4_1_extendv4qiv4si2"
9626 [(set (match_operand:V4SI 0 "register_operand" "=x")
9629 (vec_duplicate:V16QI
9630 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9631 (parallel [(const_int 0)
9636 "%vpmovsxbd\t{%1, %0|%0, %1}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "prefix" "maybe_vex")
9640 (set_attr "mode" "TI")])
9642 (define_insn "sse4_1_extendv2qiv2di2"
9643 [(set (match_operand:V2DI 0 "register_operand" "=x")
9646 (match_operand:V16QI 1 "register_operand" "x")
9647 (parallel [(const_int 0)
9650 "%vpmovsxbq\t{%1, %0|%0, %1}"
9651 [(set_attr "type" "ssemov")
9652 (set_attr "prefix_extra" "1")
9653 (set_attr "prefix" "maybe_vex")
9654 (set_attr "mode" "TI")])
9656 (define_insn "*sse4_1_extendv2qiv2di2"
9657 [(set (match_operand:V2DI 0 "register_operand" "=x")
9660 (vec_duplicate:V16QI
9661 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9662 (parallel [(const_int 0)
9665 "%vpmovsxbq\t{%1, %0|%0, %1}"
9666 [(set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "prefix" "maybe_vex")
9669 (set_attr "mode" "TI")])
9671 (define_insn "sse4_1_extendv4hiv4si2"
9672 [(set (match_operand:V4SI 0 "register_operand" "=x")
9675 (match_operand:V8HI 1 "register_operand" "x")
9676 (parallel [(const_int 0)
9681 "%vpmovsxwd\t{%1, %0|%0, %1}"
9682 [(set_attr "type" "ssemov")
9683 (set_attr "prefix_extra" "1")
9684 (set_attr "prefix" "maybe_vex")
9685 (set_attr "mode" "TI")])
9687 (define_insn "*sse4_1_extendv4hiv4si2"
9688 [(set (match_operand:V4SI 0 "register_operand" "=x")
9692 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9693 (parallel [(const_int 0)
9698 "%vpmovsxwd\t{%1, %0|%0, %1}"
9699 [(set_attr "type" "ssemov")
9700 (set_attr "prefix_extra" "1")
9701 (set_attr "prefix" "maybe_vex")
9702 (set_attr "mode" "TI")])
9704 (define_insn "sse4_1_extendv2hiv2di2"
9705 [(set (match_operand:V2DI 0 "register_operand" "=x")
9708 (match_operand:V8HI 1 "register_operand" "x")
9709 (parallel [(const_int 0)
9712 "%vpmovsxwq\t{%1, %0|%0, %1}"
9713 [(set_attr "type" "ssemov")
9714 (set_attr "prefix_extra" "1")
9715 (set_attr "prefix" "maybe_vex")
9716 (set_attr "mode" "TI")])
9718 (define_insn "*sse4_1_extendv2hiv2di2"
9719 [(set (match_operand:V2DI 0 "register_operand" "=x")
9723 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9724 (parallel [(const_int 0)
9727 "%vpmovsxwq\t{%1, %0|%0, %1}"
9728 [(set_attr "type" "ssemov")
9729 (set_attr "prefix_extra" "1")
9730 (set_attr "prefix" "maybe_vex")
9731 (set_attr "mode" "TI")])
9733 (define_insn "sse4_1_extendv2siv2di2"
9734 [(set (match_operand:V2DI 0 "register_operand" "=x")
9737 (match_operand:V4SI 1 "register_operand" "x")
9738 (parallel [(const_int 0)
9741 "%vpmovsxdq\t{%1, %0|%0, %1}"
9742 [(set_attr "type" "ssemov")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "prefix" "maybe_vex")
9745 (set_attr "mode" "TI")])
9747 (define_insn "*sse4_1_extendv2siv2di2"
9748 [(set (match_operand:V2DI 0 "register_operand" "=x")
9752 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9753 (parallel [(const_int 0)
9756 "%vpmovsxdq\t{%1, %0|%0, %1}"
9757 [(set_attr "type" "ssemov")
9758 (set_attr "prefix_extra" "1")
9759 (set_attr "prefix" "maybe_vex")
9760 (set_attr "mode" "TI")])
9762 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9763 [(set (match_operand:V8HI 0 "register_operand" "=x")
9766 (match_operand:V16QI 1 "register_operand" "x")
9767 (parallel [(const_int 0)
9776 "%vpmovzxbw\t{%1, %0|%0, %1}"
9777 [(set_attr "type" "ssemov")
9778 (set_attr "prefix_extra" "1")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "mode" "TI")])
9782 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9783 [(set (match_operand:V8HI 0 "register_operand" "=x")
9786 (vec_duplicate:V16QI
9787 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9788 (parallel [(const_int 0)
9797 "%vpmovzxbw\t{%1, %0|%0, %1}"
9798 [(set_attr "type" "ssemov")
9799 (set_attr "prefix_extra" "1")
9800 (set_attr "prefix" "maybe_vex")
9801 (set_attr "mode" "TI")])
9803 (define_insn "sse4_1_zero_extendv4qiv4si2"
9804 [(set (match_operand:V4SI 0 "register_operand" "=x")
9807 (match_operand:V16QI 1 "register_operand" "x")
9808 (parallel [(const_int 0)
9813 "%vpmovzxbd\t{%1, %0|%0, %1}"
9814 [(set_attr "type" "ssemov")
9815 (set_attr "prefix_extra" "1")
9816 (set_attr "prefix" "maybe_vex")
9817 (set_attr "mode" "TI")])
9819 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9820 [(set (match_operand:V4SI 0 "register_operand" "=x")
9823 (vec_duplicate:V16QI
9824 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9825 (parallel [(const_int 0)
9830 "%vpmovzxbd\t{%1, %0|%0, %1}"
9831 [(set_attr "type" "ssemov")
9832 (set_attr "prefix_extra" "1")
9833 (set_attr "prefix" "maybe_vex")
9834 (set_attr "mode" "TI")])
9836 (define_insn "sse4_1_zero_extendv2qiv2di2"
9837 [(set (match_operand:V2DI 0 "register_operand" "=x")
9840 (match_operand:V16QI 1 "register_operand" "x")
9841 (parallel [(const_int 0)
9844 "%vpmovzxbq\t{%1, %0|%0, %1}"
9845 [(set_attr "type" "ssemov")
9846 (set_attr "prefix_extra" "1")
9847 (set_attr "prefix" "maybe_vex")
9848 (set_attr "mode" "TI")])
9850 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9851 [(set (match_operand:V2DI 0 "register_operand" "=x")
9854 (vec_duplicate:V16QI
9855 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9856 (parallel [(const_int 0)
9859 "%vpmovzxbq\t{%1, %0|%0, %1}"
9860 [(set_attr "type" "ssemov")
9861 (set_attr "prefix_extra" "1")
9862 (set_attr "prefix" "maybe_vex")
9863 (set_attr "mode" "TI")])
9865 (define_insn "sse4_1_zero_extendv4hiv4si2"
9866 [(set (match_operand:V4SI 0 "register_operand" "=x")
9869 (match_operand:V8HI 1 "register_operand" "x")
9870 (parallel [(const_int 0)
9875 "%vpmovzxwd\t{%1, %0|%0, %1}"
9876 [(set_attr "type" "ssemov")
9877 (set_attr "prefix_extra" "1")
9878 (set_attr "prefix" "maybe_vex")
9879 (set_attr "mode" "TI")])
9881 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9882 [(set (match_operand:V4SI 0 "register_operand" "=x")
9886 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9887 (parallel [(const_int 0)
9892 "%vpmovzxwd\t{%1, %0|%0, %1}"
9893 [(set_attr "type" "ssemov")
9894 (set_attr "prefix_extra" "1")
9895 (set_attr "prefix" "maybe_vex")
9896 (set_attr "mode" "TI")])
9898 (define_insn "sse4_1_zero_extendv2hiv2di2"
9899 [(set (match_operand:V2DI 0 "register_operand" "=x")
9902 (match_operand:V8HI 1 "register_operand" "x")
9903 (parallel [(const_int 0)
9906 "%vpmovzxwq\t{%1, %0|%0, %1}"
9907 [(set_attr "type" "ssemov")
9908 (set_attr "prefix_extra" "1")
9909 (set_attr "prefix" "maybe_vex")
9910 (set_attr "mode" "TI")])
9912 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9913 [(set (match_operand:V2DI 0 "register_operand" "=x")
9917 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9918 (parallel [(const_int 0)
9921 "%vpmovzxwq\t{%1, %0|%0, %1}"
9922 [(set_attr "type" "ssemov")
9923 (set_attr "prefix_extra" "1")
9924 (set_attr "prefix" "maybe_vex")
9925 (set_attr "mode" "TI")])
9927 (define_insn "sse4_1_zero_extendv2siv2di2"
9928 [(set (match_operand:V2DI 0 "register_operand" "=x")
9931 (match_operand:V4SI 1 "register_operand" "x")
9932 (parallel [(const_int 0)
9935 "%vpmovzxdq\t{%1, %0|%0, %1}"
9936 [(set_attr "type" "ssemov")
9937 (set_attr "prefix_extra" "1")
9938 (set_attr "prefix" "maybe_vex")
9939 (set_attr "mode" "TI")])
9941 (define_insn "*sse4_1_zero_extendv2siv2di2"
9942 [(set (match_operand:V2DI 0 "register_operand" "=x")
9946 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9947 (parallel [(const_int 0)
9950 "%vpmovzxdq\t{%1, %0|%0, %1}"
9951 [(set_attr "type" "ssemov")
9952 (set_attr "prefix_extra" "1")
9953 (set_attr "prefix" "maybe_vex")
9954 (set_attr "mode" "TI")])
9956 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9957 ;; setting FLAGS_REG. But it is not a really compare instruction.
9958 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9959 [(set (reg:CC FLAGS_REG)
9960 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9961 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9964 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9965 [(set_attr "type" "ssecomi")
9966 (set_attr "prefix_extra" "1")
9967 (set_attr "prefix" "vex")
9968 (set_attr "mode" "<MODE>")])
9970 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9971 ;; But it is not a really compare instruction.
9972 (define_insn "avx_ptest256"
9973 [(set (reg:CC FLAGS_REG)
9974 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9975 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9978 "vptest\t{%1, %0|%0, %1}"
9979 [(set_attr "type" "ssecomi")
9980 (set_attr "prefix_extra" "1")
9981 (set_attr "prefix" "vex")
9982 (set_attr "mode" "OI")])
9984 (define_insn "sse4_1_ptest"
9985 [(set (reg:CC FLAGS_REG)
9986 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9987 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9990 "%vptest\t{%1, %0|%0, %1}"
9991 [(set_attr "type" "ssecomi")
9992 (set_attr "prefix_extra" "1")
9993 (set_attr "prefix" "maybe_vex")
9994 (set_attr "mode" "TI")])
9996 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9997 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9998 (unspec:AVX256MODEF2P
9999 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10000 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10003 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10004 [(set_attr "type" "ssecvt")
10005 (set_attr "prefix_extra" "1")
10006 (set_attr "length_immediate" "1")
10007 (set_attr "prefix" "vex")
10008 (set_attr "mode" "<MODE>")])
10010 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
10011 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10013 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10014 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10017 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10018 [(set_attr "type" "ssecvt")
10019 (set_attr "prefix_data16" "1")
10020 (set_attr "prefix_extra" "1")
10021 (set_attr "length_immediate" "1")
10022 (set_attr "prefix" "maybe_vex")
10023 (set_attr "mode" "<MODE>")])
10025 (define_insn "*avx_rounds<ssemodesuffixf2c>"
10026 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10027 (vec_merge:SSEMODEF2P
10029 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10030 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10032 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10035 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10036 [(set_attr "type" "ssecvt")
10037 (set_attr "prefix_extra" "1")
10038 (set_attr "length_immediate" "1")
10039 (set_attr "prefix" "vex")
10040 (set_attr "mode" "<MODE>")])
10042 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
10043 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10044 (vec_merge:SSEMODEF2P
10046 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10047 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10049 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10052 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10053 [(set_attr "type" "ssecvt")
10054 (set_attr "prefix_data16" "1")
10055 (set_attr "prefix_extra" "1")
10056 (set_attr "length_immediate" "1")
10057 (set_attr "mode" "<MODE>")])
10059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10061 ;; Intel SSE4.2 string/text processing instructions
10063 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10065 (define_insn_and_split "sse4_2_pcmpestr"
10066 [(set (match_operand:SI 0 "register_operand" "=c,c")
10068 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10069 (match_operand:SI 3 "register_operand" "a,a")
10070 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10071 (match_operand:SI 5 "register_operand" "d,d")
10072 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10074 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10082 (set (reg:CC FLAGS_REG)
10091 && can_create_pseudo_p ()"
10096 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10097 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10098 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10101 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10102 operands[3], operands[4],
10103 operands[5], operands[6]));
10105 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10106 operands[3], operands[4],
10107 operands[5], operands[6]));
10108 if (flags && !(ecx || xmm0))
10109 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10110 operands[2], operands[3],
10111 operands[4], operands[5],
10115 [(set_attr "type" "sselog")
10116 (set_attr "prefix_data16" "1")
10117 (set_attr "prefix_extra" "1")
10118 (set_attr "length_immediate" "1")
10119 (set_attr "memory" "none,load")
10120 (set_attr "mode" "TI")])
10122 (define_insn "sse4_2_pcmpestri"
10123 [(set (match_operand:SI 0 "register_operand" "=c,c")
10125 [(match_operand:V16QI 1 "register_operand" "x,x")
10126 (match_operand:SI 2 "register_operand" "a,a")
10127 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10128 (match_operand:SI 4 "register_operand" "d,d")
10129 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10131 (set (reg:CC FLAGS_REG)
10140 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10141 [(set_attr "type" "sselog")
10142 (set_attr "prefix_data16" "1")
10143 (set_attr "prefix_extra" "1")
10144 (set_attr "prefix" "maybe_vex")
10145 (set_attr "length_immediate" "1")
10146 (set_attr "memory" "none,load")
10147 (set_attr "mode" "TI")])
10149 (define_insn "sse4_2_pcmpestrm"
10150 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10152 [(match_operand:V16QI 1 "register_operand" "x,x")
10153 (match_operand:SI 2 "register_operand" "a,a")
10154 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10155 (match_operand:SI 4 "register_operand" "d,d")
10156 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10158 (set (reg:CC FLAGS_REG)
10167 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10168 [(set_attr "type" "sselog")
10169 (set_attr "prefix_data16" "1")
10170 (set_attr "prefix_extra" "1")
10171 (set_attr "length_immediate" "1")
10172 (set_attr "prefix" "maybe_vex")
10173 (set_attr "memory" "none,load")
10174 (set_attr "mode" "TI")])
10176 (define_insn "sse4_2_pcmpestr_cconly"
10177 [(set (reg:CC FLAGS_REG)
10179 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10180 (match_operand:SI 3 "register_operand" "a,a,a,a")
10181 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10182 (match_operand:SI 5 "register_operand" "d,d,d,d")
10183 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10185 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10186 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10189 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10190 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10191 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10192 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10193 [(set_attr "type" "sselog")
10194 (set_attr "prefix_data16" "1")
10195 (set_attr "prefix_extra" "1")
10196 (set_attr "length_immediate" "1")
10197 (set_attr "memory" "none,load,none,load")
10198 (set_attr "prefix" "maybe_vex")
10199 (set_attr "mode" "TI")])
10201 (define_insn_and_split "sse4_2_pcmpistr"
10202 [(set (match_operand:SI 0 "register_operand" "=c,c")
10204 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10205 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10206 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10208 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10214 (set (reg:CC FLAGS_REG)
10221 && can_create_pseudo_p ()"
10226 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10227 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10228 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10231 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10232 operands[3], operands[4]));
10234 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10235 operands[3], operands[4]));
10236 if (flags && !(ecx || xmm0))
10237 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10238 operands[2], operands[3],
10242 [(set_attr "type" "sselog")
10243 (set_attr "prefix_data16" "1")
10244 (set_attr "prefix_extra" "1")
10245 (set_attr "length_immediate" "1")
10246 (set_attr "memory" "none,load")
10247 (set_attr "mode" "TI")])
10249 (define_insn "sse4_2_pcmpistri"
10250 [(set (match_operand:SI 0 "register_operand" "=c,c")
10252 [(match_operand:V16QI 1 "register_operand" "x,x")
10253 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10254 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10256 (set (reg:CC FLAGS_REG)
10263 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10264 [(set_attr "type" "sselog")
10265 (set_attr "prefix_data16" "1")
10266 (set_attr "prefix_extra" "1")
10267 (set_attr "length_immediate" "1")
10268 (set_attr "prefix" "maybe_vex")
10269 (set_attr "memory" "none,load")
10270 (set_attr "mode" "TI")])
10272 (define_insn "sse4_2_pcmpistrm"
10273 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10275 [(match_operand:V16QI 1 "register_operand" "x,x")
10276 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10277 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10279 (set (reg:CC FLAGS_REG)
10286 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10287 [(set_attr "type" "sselog")
10288 (set_attr "prefix_data16" "1")
10289 (set_attr "prefix_extra" "1")
10290 (set_attr "length_immediate" "1")
10291 (set_attr "prefix" "maybe_vex")
10292 (set_attr "memory" "none,load")
10293 (set_attr "mode" "TI")])
10295 (define_insn "sse4_2_pcmpistr_cconly"
10296 [(set (reg:CC FLAGS_REG)
10298 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10299 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10300 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10302 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10303 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10306 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10307 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10308 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10309 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10310 [(set_attr "type" "sselog")
10311 (set_attr "prefix_data16" "1")
10312 (set_attr "prefix_extra" "1")
10313 (set_attr "length_immediate" "1")
10314 (set_attr "memory" "none,load,none,load")
10315 (set_attr "prefix" "maybe_vex")
10316 (set_attr "mode" "TI")])
10318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10320 ;; XOP instructions
10322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10324 ;; XOP parallel integer multiply/add instructions.
10325 ;; Note the XOP multiply/add instructions
10326 ;; a[i] = b[i] * c[i] + d[i];
10327 ;; do not allow the value being added to be a memory operation.
10328 (define_insn "xop_pmacsww"
10329 [(set (match_operand:V8HI 0 "register_operand" "=x")
10332 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10333 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10334 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10336 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10337 [(set_attr "type" "ssemuladd")
10338 (set_attr "mode" "TI")])
10340 (define_insn "xop_pmacssww"
10341 [(set (match_operand:V8HI 0 "register_operand" "=x")
10343 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10344 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10345 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10347 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10348 [(set_attr "type" "ssemuladd")
10349 (set_attr "mode" "TI")])
10351 (define_insn "xop_pmacsdd"
10352 [(set (match_operand:V4SI 0 "register_operand" "=x")
10355 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10356 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10357 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10359 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10360 [(set_attr "type" "ssemuladd")
10361 (set_attr "mode" "TI")])
10363 (define_insn "xop_pmacssdd"
10364 [(set (match_operand:V4SI 0 "register_operand" "=x")
10366 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10367 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10368 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10370 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10371 [(set_attr "type" "ssemuladd")
10372 (set_attr "mode" "TI")])
10374 (define_insn "xop_pmacssdql"
10375 [(set (match_operand:V2DI 0 "register_operand" "=x")
10380 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10381 (parallel [(const_int 1)
10384 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10385 (parallel [(const_int 1)
10387 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10389 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10390 [(set_attr "type" "ssemuladd")
10391 (set_attr "mode" "TI")])
10393 (define_insn "xop_pmacssdqh"
10394 [(set (match_operand:V2DI 0 "register_operand" "=x")
10399 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10400 (parallel [(const_int 0)
10404 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10405 (parallel [(const_int 0)
10407 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10409 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10410 [(set_attr "type" "ssemuladd")
10411 (set_attr "mode" "TI")])
10413 (define_insn "xop_pmacsdql"
10414 [(set (match_operand:V2DI 0 "register_operand" "=x")
10419 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10420 (parallel [(const_int 1)
10424 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10425 (parallel [(const_int 1)
10427 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10429 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10430 [(set_attr "type" "ssemuladd")
10431 (set_attr "mode" "TI")])
10433 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10434 ;; fake it with a multiply/add. In general, we expect the define_split to
10435 ;; occur before register allocation, so we have to handle the corner case where
10436 ;; the target is the same as operands 1/2
10437 (define_insn_and_split "xop_mulv2div2di3_low"
10438 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10442 (match_operand:V4SI 1 "register_operand" "%x")
10443 (parallel [(const_int 1)
10447 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10448 (parallel [(const_int 1)
10449 (const_int 3)])))))]
10452 "&& reload_completed"
10453 [(set (match_dup 0)
10461 (parallel [(const_int 1)
10466 (parallel [(const_int 1)
10470 operands[3] = CONST0_RTX (V2DImode);
10472 [(set_attr "type" "ssemul")
10473 (set_attr "mode" "TI")])
10475 (define_insn "xop_pmacsdqh"
10476 [(set (match_operand:V2DI 0 "register_operand" "=x")
10481 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10482 (parallel [(const_int 0)
10486 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10487 (parallel [(const_int 0)
10489 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10491 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10492 [(set_attr "type" "ssemuladd")
10493 (set_attr "mode" "TI")])
10495 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10496 ;; fake it with a multiply/add. In general, we expect the define_split to
10497 ;; occur before register allocation, so we have to handle the corner case where
10498 ;; the target is the same as either operands[1] or operands[2]
10499 (define_insn_and_split "xop_mulv2div2di3_high"
10500 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10504 (match_operand:V4SI 1 "register_operand" "%x")
10505 (parallel [(const_int 0)
10509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10510 (parallel [(const_int 0)
10511 (const_int 2)])))))]
10514 "&& reload_completed"
10515 [(set (match_dup 0)
10523 (parallel [(const_int 0)
10528 (parallel [(const_int 0)
10532 operands[3] = CONST0_RTX (V2DImode);
10534 [(set_attr "type" "ssemul")
10535 (set_attr "mode" "TI")])
10537 ;; XOP parallel integer multiply/add instructions for the intrinisics
10538 (define_insn "xop_pmacsswd"
10539 [(set (match_operand:V4SI 0 "register_operand" "=x")
10544 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10545 (parallel [(const_int 1)
10551 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10552 (parallel [(const_int 1)
10556 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10558 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10559 [(set_attr "type" "ssemuladd")
10560 (set_attr "mode" "TI")])
10562 (define_insn "xop_pmacswd"
10563 [(set (match_operand:V4SI 0 "register_operand" "=x")
10568 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10569 (parallel [(const_int 1)
10575 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10576 (parallel [(const_int 1)
10580 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10582 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10583 [(set_attr "type" "ssemuladd")
10584 (set_attr "mode" "TI")])
10586 (define_insn "xop_pmadcsswd"
10587 [(set (match_operand:V4SI 0 "register_operand" "=x")
10593 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10594 (parallel [(const_int 0)
10600 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10601 (parallel [(const_int 0)
10609 (parallel [(const_int 1)
10616 (parallel [(const_int 1)
10619 (const_int 7)])))))
10620 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10622 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10623 [(set_attr "type" "ssemuladd")
10624 (set_attr "mode" "TI")])
10626 (define_insn "xop_pmadcswd"
10627 [(set (match_operand:V4SI 0 "register_operand" "=x")
10633 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10634 (parallel [(const_int 0)
10640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10641 (parallel [(const_int 0)
10649 (parallel [(const_int 1)
10656 (parallel [(const_int 1)
10659 (const_int 7)])))))
10660 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10662 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10663 [(set_attr "type" "ssemuladd")
10664 (set_attr "mode" "TI")])
10666 ;; XOP parallel XMM conditional moves
10667 (define_insn "xop_pcmov_<mode>"
10668 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10669 (if_then_else:SSEMODE
10670 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10671 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10672 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10674 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10675 [(set_attr "type" "sse4arg")])
10677 (define_insn "xop_pcmov_<mode>256"
10678 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10679 (if_then_else:AVX256MODE
10680 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10681 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10682 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10684 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10685 [(set_attr "type" "sse4arg")])
10687 ;; XOP horizontal add/subtract instructions
10688 (define_insn "xop_phaddbw"
10689 [(set (match_operand:V8HI 0 "register_operand" "=x")
10693 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10694 (parallel [(const_int 0)
10705 (parallel [(const_int 1)
10712 (const_int 15)])))))]
10714 "vphaddbw\t{%1, %0|%0, %1}"
10715 [(set_attr "type" "sseiadd1")])
10717 (define_insn "xop_phaddbd"
10718 [(set (match_operand:V4SI 0 "register_operand" "=x")
10723 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10724 (parallel [(const_int 0)
10731 (parallel [(const_int 1)
10734 (const_int 13)]))))
10739 (parallel [(const_int 2)
10746 (parallel [(const_int 3)
10749 (const_int 15)]))))))]
10751 "vphaddbd\t{%1, %0|%0, %1}"
10752 [(set_attr "type" "sseiadd1")])
10754 (define_insn "xop_phaddbq"
10755 [(set (match_operand:V2DI 0 "register_operand" "=x")
10761 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10762 (parallel [(const_int 0)
10767 (parallel [(const_int 1)
10773 (parallel [(const_int 2)
10778 (parallel [(const_int 3)
10779 (const_int 7)])))))
10785 (parallel [(const_int 8)
10790 (parallel [(const_int 9)
10791 (const_int 13)]))))
10796 (parallel [(const_int 10)
10801 (parallel [(const_int 11)
10802 (const_int 15)])))))))]
10804 "vphaddbq\t{%1, %0|%0, %1}"
10805 [(set_attr "type" "sseiadd1")])
10807 (define_insn "xop_phaddwd"
10808 [(set (match_operand:V4SI 0 "register_operand" "=x")
10812 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10813 (parallel [(const_int 0)
10820 (parallel [(const_int 1)
10823 (const_int 7)])))))]
10825 "vphaddwd\t{%1, %0|%0, %1}"
10826 [(set_attr "type" "sseiadd1")])
10828 (define_insn "xop_phaddwq"
10829 [(set (match_operand:V2DI 0 "register_operand" "=x")
10834 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10835 (parallel [(const_int 0)
10840 (parallel [(const_int 1)
10846 (parallel [(const_int 2)
10851 (parallel [(const_int 3)
10852 (const_int 7)]))))))]
10854 "vphaddwq\t{%1, %0|%0, %1}"
10855 [(set_attr "type" "sseiadd1")])
10857 (define_insn "xop_phadddq"
10858 [(set (match_operand:V2DI 0 "register_operand" "=x")
10862 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10863 (parallel [(const_int 0)
10868 (parallel [(const_int 1)
10869 (const_int 3)])))))]
10871 "vphadddq\t{%1, %0|%0, %1}"
10872 [(set_attr "type" "sseiadd1")])
10874 (define_insn "xop_phaddubw"
10875 [(set (match_operand:V8HI 0 "register_operand" "=x")
10879 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10880 (parallel [(const_int 0)
10891 (parallel [(const_int 1)
10898 (const_int 15)])))))]
10900 "vphaddubw\t{%1, %0|%0, %1}"
10901 [(set_attr "type" "sseiadd1")])
10903 (define_insn "xop_phaddubd"
10904 [(set (match_operand:V4SI 0 "register_operand" "=x")
10909 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10910 (parallel [(const_int 0)
10917 (parallel [(const_int 1)
10920 (const_int 13)]))))
10925 (parallel [(const_int 2)
10932 (parallel [(const_int 3)
10935 (const_int 15)]))))))]
10937 "vphaddubd\t{%1, %0|%0, %1}"
10938 [(set_attr "type" "sseiadd1")])
10940 (define_insn "xop_phaddubq"
10941 [(set (match_operand:V2DI 0 "register_operand" "=x")
10947 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10948 (parallel [(const_int 0)
10953 (parallel [(const_int 1)
10959 (parallel [(const_int 2)
10964 (parallel [(const_int 3)
10965 (const_int 7)])))))
10971 (parallel [(const_int 8)
10976 (parallel [(const_int 9)
10977 (const_int 13)]))))
10982 (parallel [(const_int 10)
10987 (parallel [(const_int 11)
10988 (const_int 15)])))))))]
10990 "vphaddubq\t{%1, %0|%0, %1}"
10991 [(set_attr "type" "sseiadd1")])
10993 (define_insn "xop_phadduwd"
10994 [(set (match_operand:V4SI 0 "register_operand" "=x")
10998 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10999 (parallel [(const_int 0)
11006 (parallel [(const_int 1)
11009 (const_int 7)])))))]
11011 "vphadduwd\t{%1, %0|%0, %1}"
11012 [(set_attr "type" "sseiadd1")])
11014 (define_insn "xop_phadduwq"
11015 [(set (match_operand:V2DI 0 "register_operand" "=x")
11020 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11021 (parallel [(const_int 0)
11026 (parallel [(const_int 1)
11032 (parallel [(const_int 2)
11037 (parallel [(const_int 3)
11038 (const_int 7)]))))))]
11040 "vphadduwq\t{%1, %0|%0, %1}"
11041 [(set_attr "type" "sseiadd1")])
11043 (define_insn "xop_phaddudq"
11044 [(set (match_operand:V2DI 0 "register_operand" "=x")
11048 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11049 (parallel [(const_int 0)
11054 (parallel [(const_int 1)
11055 (const_int 3)])))))]
11057 "vphaddudq\t{%1, %0|%0, %1}"
11058 [(set_attr "type" "sseiadd1")])
11060 (define_insn "xop_phsubbw"
11061 [(set (match_operand:V8HI 0 "register_operand" "=x")
11065 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11066 (parallel [(const_int 0)
11077 (parallel [(const_int 1)
11084 (const_int 15)])))))]
11086 "vphsubbw\t{%1, %0|%0, %1}"
11087 [(set_attr "type" "sseiadd1")])
11089 (define_insn "xop_phsubwd"
11090 [(set (match_operand:V4SI 0 "register_operand" "=x")
11094 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11095 (parallel [(const_int 0)
11102 (parallel [(const_int 1)
11105 (const_int 7)])))))]
11107 "vphsubwd\t{%1, %0|%0, %1}"
11108 [(set_attr "type" "sseiadd1")])
11110 (define_insn "xop_phsubdq"
11111 [(set (match_operand:V2DI 0 "register_operand" "=x")
11115 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11116 (parallel [(const_int 0)
11121 (parallel [(const_int 1)
11122 (const_int 3)])))))]
11124 "vphsubdq\t{%1, %0|%0, %1}"
11125 [(set_attr "type" "sseiadd1")])
11127 ;; XOP permute instructions
11128 (define_insn "xop_pperm"
11129 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11131 [(match_operand:V16QI 1 "register_operand" "x,x")
11132 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11133 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11134 UNSPEC_XOP_PERMUTE))]
11135 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11136 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11137 [(set_attr "type" "sse4arg")
11138 (set_attr "mode" "TI")])
11140 ;; XOP pack instructions that combine two vectors into a smaller vector
11141 (define_insn "xop_pperm_pack_v2di_v4si"
11142 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11145 (match_operand:V2DI 1 "register_operand" "x,x"))
11147 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11148 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11149 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11150 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11151 [(set_attr "type" "sse4arg")
11152 (set_attr "mode" "TI")])
11154 (define_insn "xop_pperm_pack_v4si_v8hi"
11155 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11158 (match_operand:V4SI 1 "register_operand" "x,x"))
11160 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11161 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11162 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11163 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11164 [(set_attr "type" "sse4arg")
11165 (set_attr "mode" "TI")])
11167 (define_insn "xop_pperm_pack_v8hi_v16qi"
11168 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11171 (match_operand:V8HI 1 "register_operand" "x,x"))
11173 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11174 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11175 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11176 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11177 [(set_attr "type" "sse4arg")
11178 (set_attr "mode" "TI")])
11180 ;; XOP packed rotate instructions
11181 (define_expand "rotl<mode>3"
11182 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11183 (rotate:SSEMODE1248
11184 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11185 (match_operand:SI 2 "general_operand")))]
11188 /* If we were given a scalar, convert it to parallel */
11189 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11191 rtvec vs = rtvec_alloc (<ssescalarnum>);
11192 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11193 rtx reg = gen_reg_rtx (<MODE>mode);
11194 rtx op2 = operands[2];
11197 if (GET_MODE (op2) != <ssescalarmode>mode)
11199 op2 = gen_reg_rtx (<ssescalarmode>mode);
11200 convert_move (op2, operands[2], false);
11203 for (i = 0; i < <ssescalarnum>; i++)
11204 RTVEC_ELT (vs, i) = op2;
11206 emit_insn (gen_vec_init<mode> (reg, par));
11207 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11212 (define_expand "rotr<mode>3"
11213 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11214 (rotatert:SSEMODE1248
11215 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11216 (match_operand:SI 2 "general_operand")))]
11219 /* If we were given a scalar, convert it to parallel */
11220 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11222 rtvec vs = rtvec_alloc (<ssescalarnum>);
11223 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11224 rtx neg = gen_reg_rtx (<MODE>mode);
11225 rtx reg = gen_reg_rtx (<MODE>mode);
11226 rtx op2 = operands[2];
11229 if (GET_MODE (op2) != <ssescalarmode>mode)
11231 op2 = gen_reg_rtx (<ssescalarmode>mode);
11232 convert_move (op2, operands[2], false);
11235 for (i = 0; i < <ssescalarnum>; i++)
11236 RTVEC_ELT (vs, i) = op2;
11238 emit_insn (gen_vec_init<mode> (reg, par));
11239 emit_insn (gen_neg<mode>2 (neg, reg));
11240 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11245 (define_insn "xop_rotl<mode>3"
11246 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11247 (rotate:SSEMODE1248
11248 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11249 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11251 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11252 [(set_attr "type" "sseishft")
11253 (set_attr "length_immediate" "1")
11254 (set_attr "mode" "TI")])
11256 (define_insn "xop_rotr<mode>3"
11257 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11258 (rotatert:SSEMODE1248
11259 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11260 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11263 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11264 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11266 [(set_attr "type" "sseishft")
11267 (set_attr "length_immediate" "1")
11268 (set_attr "mode" "TI")])
11270 (define_expand "vrotr<mode>3"
11271 [(match_operand:SSEMODE1248 0 "register_operand" "")
11272 (match_operand:SSEMODE1248 1 "register_operand" "")
11273 (match_operand:SSEMODE1248 2 "register_operand" "")]
11276 rtx reg = gen_reg_rtx (<MODE>mode);
11277 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11278 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11282 (define_expand "vrotl<mode>3"
11283 [(match_operand:SSEMODE1248 0 "register_operand" "")
11284 (match_operand:SSEMODE1248 1 "register_operand" "")
11285 (match_operand:SSEMODE1248 2 "register_operand" "")]
11288 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11292 (define_insn "xop_vrotl<mode>3"
11293 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11294 (if_then_else:SSEMODE1248
11296 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11298 (rotate:SSEMODE1248
11299 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11301 (rotatert:SSEMODE1248
11303 (neg:SSEMODE1248 (match_dup 2)))))]
11304 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11305 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11306 [(set_attr "type" "sseishft")
11307 (set_attr "prefix_data16" "0")
11308 (set_attr "prefix_extra" "2")
11309 (set_attr "mode" "TI")])
11311 ;; XOP packed shift instructions.
11312 ;; FIXME: add V2DI back in
11313 (define_expand "vlshr<mode>3"
11314 [(match_operand:SSEMODE124 0 "register_operand" "")
11315 (match_operand:SSEMODE124 1 "register_operand" "")
11316 (match_operand:SSEMODE124 2 "register_operand" "")]
11319 rtx neg = gen_reg_rtx (<MODE>mode);
11320 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11321 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11325 (define_expand "vashr<mode>3"
11326 [(match_operand:SSEMODE124 0 "register_operand" "")
11327 (match_operand:SSEMODE124 1 "register_operand" "")
11328 (match_operand:SSEMODE124 2 "register_operand" "")]
11331 rtx neg = gen_reg_rtx (<MODE>mode);
11332 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11333 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11337 (define_expand "vashl<mode>3"
11338 [(match_operand:SSEMODE124 0 "register_operand" "")
11339 (match_operand:SSEMODE124 1 "register_operand" "")
11340 (match_operand:SSEMODE124 2 "register_operand" "")]
11343 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11347 (define_insn "xop_ashl<mode>3"
11348 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11349 (if_then_else:SSEMODE1248
11351 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11353 (ashift:SSEMODE1248
11354 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11356 (ashiftrt:SSEMODE1248
11358 (neg:SSEMODE1248 (match_dup 2)))))]
11359 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11360 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11361 [(set_attr "type" "sseishft")
11362 (set_attr "prefix_data16" "0")
11363 (set_attr "prefix_extra" "2")
11364 (set_attr "mode" "TI")])
11366 (define_insn "xop_lshl<mode>3"
11367 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11368 (if_then_else:SSEMODE1248
11370 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11372 (ashift:SSEMODE1248
11373 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11375 (lshiftrt:SSEMODE1248
11377 (neg:SSEMODE1248 (match_dup 2)))))]
11378 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11379 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11380 [(set_attr "type" "sseishft")
11381 (set_attr "prefix_data16" "0")
11382 (set_attr "prefix_extra" "2")
11383 (set_attr "mode" "TI")])
11385 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11386 (define_expand "ashlv16qi3"
11387 [(match_operand:V16QI 0 "register_operand" "")
11388 (match_operand:V16QI 1 "register_operand" "")
11389 (match_operand:SI 2 "nonmemory_operand" "")]
11392 rtvec vs = rtvec_alloc (16);
11393 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11394 rtx reg = gen_reg_rtx (V16QImode);
11396 for (i = 0; i < 16; i++)
11397 RTVEC_ELT (vs, i) = operands[2];
11399 emit_insn (gen_vec_initv16qi (reg, par));
11400 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11404 (define_expand "lshlv16qi3"
11405 [(match_operand:V16QI 0 "register_operand" "")
11406 (match_operand:V16QI 1 "register_operand" "")
11407 (match_operand:SI 2 "nonmemory_operand" "")]
11410 rtvec vs = rtvec_alloc (16);
11411 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11412 rtx reg = gen_reg_rtx (V16QImode);
11414 for (i = 0; i < 16; i++)
11415 RTVEC_ELT (vs, i) = operands[2];
11417 emit_insn (gen_vec_initv16qi (reg, par));
11418 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11422 (define_expand "ashrv16qi3"
11423 [(match_operand:V16QI 0 "register_operand" "")
11424 (match_operand:V16QI 1 "register_operand" "")
11425 (match_operand:SI 2 "nonmemory_operand" "")]
11428 rtvec vs = rtvec_alloc (16);
11429 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11430 rtx reg = gen_reg_rtx (V16QImode);
11432 rtx ele = ((CONST_INT_P (operands[2]))
11433 ? GEN_INT (- INTVAL (operands[2]))
11436 for (i = 0; i < 16; i++)
11437 RTVEC_ELT (vs, i) = ele;
11439 emit_insn (gen_vec_initv16qi (reg, par));
11441 if (!CONST_INT_P (operands[2]))
11443 rtx neg = gen_reg_rtx (V16QImode);
11444 emit_insn (gen_negv16qi2 (neg, reg));
11445 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11448 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11453 (define_expand "ashrv2di3"
11454 [(match_operand:V2DI 0 "register_operand" "")
11455 (match_operand:V2DI 1 "register_operand" "")
11456 (match_operand:DI 2 "nonmemory_operand" "")]
11459 rtvec vs = rtvec_alloc (2);
11460 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11461 rtx reg = gen_reg_rtx (V2DImode);
11464 if (CONST_INT_P (operands[2]))
11465 ele = GEN_INT (- INTVAL (operands[2]));
11466 else if (GET_MODE (operands[2]) != DImode)
11468 rtx move = gen_reg_rtx (DImode);
11469 ele = gen_reg_rtx (DImode);
11470 convert_move (move, operands[2], false);
11471 emit_insn (gen_negdi2 (ele, move));
11475 ele = gen_reg_rtx (DImode);
11476 emit_insn (gen_negdi2 (ele, operands[2]));
11479 RTVEC_ELT (vs, 0) = ele;
11480 RTVEC_ELT (vs, 1) = ele;
11481 emit_insn (gen_vec_initv2di (reg, par));
11482 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11486 ;; XOP FRCZ support
11488 (define_insn "xop_frcz<mode>2"
11489 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11491 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11494 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11495 [(set_attr "type" "ssecvt1")
11496 (set_attr "mode" "<MODE>")])
11499 (define_insn "xop_vmfrcz<mode>2"
11500 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11501 (vec_merge:SSEMODEF2P
11503 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11505 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11508 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11509 [(set_attr "type" "ssecvt1")
11510 (set_attr "mode" "<MODE>")])
11512 (define_insn "xop_frcz<mode>2256"
11513 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11515 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11518 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11519 [(set_attr "type" "ssecvt1")
11520 (set_attr "mode" "<MODE>")])
11522 (define_insn "xop_maskcmp<mode>3"
11523 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11524 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11525 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11526 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11528 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11529 [(set_attr "type" "sse4arg")
11530 (set_attr "prefix_data16" "0")
11531 (set_attr "prefix_rep" "0")
11532 (set_attr "prefix_extra" "2")
11533 (set_attr "length_immediate" "1")
11534 (set_attr "mode" "TI")])
11536 (define_insn "xop_maskcmp_uns<mode>3"
11537 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11538 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11539 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11540 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11542 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11543 [(set_attr "type" "ssecmp")
11544 (set_attr "prefix_data16" "0")
11545 (set_attr "prefix_rep" "0")
11546 (set_attr "prefix_extra" "2")
11547 (set_attr "length_immediate" "1")
11548 (set_attr "mode" "TI")])
11550 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11551 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11552 ;; the exact instruction generated for the intrinsic.
11553 (define_insn "xop_maskcmp_uns2<mode>3"
11554 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11555 (unspec:SSEMODE1248
11556 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11557 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11558 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11559 UNSPEC_XOP_UNSIGNED_CMP))]
11561 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11562 [(set_attr "type" "ssecmp")
11563 (set_attr "prefix_data16" "0")
11564 (set_attr "prefix_extra" "2")
11565 (set_attr "length_immediate" "1")
11566 (set_attr "mode" "TI")])
11568 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11569 ;; being added here to be complete.
11570 (define_insn "xop_pcom_tf<mode>3"
11571 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11572 (unspec:SSEMODE1248
11573 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11574 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11575 (match_operand:SI 3 "const_int_operand" "n")]
11576 UNSPEC_XOP_TRUEFALSE))]
11579 return ((INTVAL (operands[3]) != 0)
11580 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11581 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11583 [(set_attr "type" "ssecmp")
11584 (set_attr "prefix_data16" "0")
11585 (set_attr "prefix_extra" "2")
11586 (set_attr "length_immediate" "1")
11587 (set_attr "mode" "TI")])
11589 (define_insn "xop_vpermil2<mode>3"
11590 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11592 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11593 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11594 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11595 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11598 "vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11599 [(set_attr "type" "sse4arg")
11600 (set_attr "length_immediate" "1")
11601 (set_attr "mode" "<MODE>")])
11603 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11604 (define_insn "*avx_aesenc"
11605 [(set (match_operand:V2DI 0 "register_operand" "=x")
11606 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11607 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11609 "TARGET_AES && TARGET_AVX"
11610 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11611 [(set_attr "type" "sselog1")
11612 (set_attr "prefix_extra" "1")
11613 (set_attr "prefix" "vex")
11614 (set_attr "mode" "TI")])
11616 (define_insn "aesenc"
11617 [(set (match_operand:V2DI 0 "register_operand" "=x")
11618 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11619 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11622 "aesenc\t{%2, %0|%0, %2}"
11623 [(set_attr "type" "sselog1")
11624 (set_attr "prefix_extra" "1")
11625 (set_attr "mode" "TI")])
11627 (define_insn "*avx_aesenclast"
11628 [(set (match_operand:V2DI 0 "register_operand" "=x")
11629 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11630 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11631 UNSPEC_AESENCLAST))]
11632 "TARGET_AES && TARGET_AVX"
11633 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11634 [(set_attr "type" "sselog1")
11635 (set_attr "prefix_extra" "1")
11636 (set_attr "prefix" "vex")
11637 (set_attr "mode" "TI")])
11639 (define_insn "aesenclast"
11640 [(set (match_operand:V2DI 0 "register_operand" "=x")
11641 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11642 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11643 UNSPEC_AESENCLAST))]
11645 "aesenclast\t{%2, %0|%0, %2}"
11646 [(set_attr "type" "sselog1")
11647 (set_attr "prefix_extra" "1")
11648 (set_attr "mode" "TI")])
11650 (define_insn "*avx_aesdec"
11651 [(set (match_operand:V2DI 0 "register_operand" "=x")
11652 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11653 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11655 "TARGET_AES && TARGET_AVX"
11656 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11657 [(set_attr "type" "sselog1")
11658 (set_attr "prefix_extra" "1")
11659 (set_attr "prefix" "vex")
11660 (set_attr "mode" "TI")])
11662 (define_insn "aesdec"
11663 [(set (match_operand:V2DI 0 "register_operand" "=x")
11664 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11665 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11668 "aesdec\t{%2, %0|%0, %2}"
11669 [(set_attr "type" "sselog1")
11670 (set_attr "prefix_extra" "1")
11671 (set_attr "mode" "TI")])
11673 (define_insn "*avx_aesdeclast"
11674 [(set (match_operand:V2DI 0 "register_operand" "=x")
11675 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11676 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11677 UNSPEC_AESDECLAST))]
11678 "TARGET_AES && TARGET_AVX"
11679 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11680 [(set_attr "type" "sselog1")
11681 (set_attr "prefix_extra" "1")
11682 (set_attr "prefix" "vex")
11683 (set_attr "mode" "TI")])
11685 (define_insn "aesdeclast"
11686 [(set (match_operand:V2DI 0 "register_operand" "=x")
11687 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11688 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11689 UNSPEC_AESDECLAST))]
11691 "aesdeclast\t{%2, %0|%0, %2}"
11692 [(set_attr "type" "sselog1")
11693 (set_attr "prefix_extra" "1")
11694 (set_attr "mode" "TI")])
11696 (define_insn "aesimc"
11697 [(set (match_operand:V2DI 0 "register_operand" "=x")
11698 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11701 "%vaesimc\t{%1, %0|%0, %1}"
11702 [(set_attr "type" "sselog1")
11703 (set_attr "prefix_extra" "1")
11704 (set_attr "prefix" "maybe_vex")
11705 (set_attr "mode" "TI")])
11707 (define_insn "aeskeygenassist"
11708 [(set (match_operand:V2DI 0 "register_operand" "=x")
11709 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11710 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11711 UNSPEC_AESKEYGENASSIST))]
11713 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11714 [(set_attr "type" "sselog1")
11715 (set_attr "prefix_extra" "1")
11716 (set_attr "length_immediate" "1")
11717 (set_attr "prefix" "maybe_vex")
11718 (set_attr "mode" "TI")])
11720 (define_insn "*vpclmulqdq"
11721 [(set (match_operand:V2DI 0 "register_operand" "=x")
11722 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11723 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11724 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11726 "TARGET_PCLMUL && TARGET_AVX"
11727 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11728 [(set_attr "type" "sselog1")
11729 (set_attr "prefix_extra" "1")
11730 (set_attr "length_immediate" "1")
11731 (set_attr "prefix" "vex")
11732 (set_attr "mode" "TI")])
11734 (define_insn "pclmulqdq"
11735 [(set (match_operand:V2DI 0 "register_operand" "=x")
11736 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11737 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11738 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11741 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11742 [(set_attr "type" "sselog1")
11743 (set_attr "prefix_extra" "1")
11744 (set_attr "length_immediate" "1")
11745 (set_attr "mode" "TI")])
11747 (define_expand "avx_vzeroall"
11748 [(match_par_dup 0 [(const_int 0)])]
11751 int nregs = TARGET_64BIT ? 16 : 8;
11754 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11756 XVECEXP (operands[0], 0, 0)
11757 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11760 for (regno = 0; regno < nregs; regno++)
11761 XVECEXP (operands[0], 0, regno + 1)
11762 = gen_rtx_SET (VOIDmode,
11763 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11764 CONST0_RTX (V8SImode));
11767 (define_insn "*avx_vzeroall"
11768 [(match_parallel 0 "vzeroall_operation"
11769 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11772 [(set_attr "type" "sse")
11773 (set_attr "modrm" "0")
11774 (set_attr "memory" "none")
11775 (set_attr "prefix" "vex")
11776 (set_attr "mode" "OI")])
11778 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11779 (define_expand "avx_vzeroupper"
11780 [(match_par_dup 0 [(const_int 0)])]
11783 int nregs = TARGET_64BIT ? 16 : 8;
11786 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11788 XVECEXP (operands[0], 0, 0)
11789 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11790 UNSPECV_VZEROUPPER);
11792 for (regno = 0; regno < nregs; regno++)
11793 XVECEXP (operands[0], 0, regno + 1)
11794 = gen_rtx_CLOBBER (VOIDmode,
11795 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11798 (define_insn "*avx_vzeroupper"
11799 [(match_parallel 0 "vzeroupper_operation"
11800 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11803 [(set_attr "type" "sse")
11804 (set_attr "modrm" "0")
11805 (set_attr "memory" "none")
11806 (set_attr "prefix" "vex")
11807 (set_attr "mode" "OI")])
11809 (define_insn_and_split "vec_dup<mode>"
11810 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11811 (vec_duplicate:AVX256MODE24P
11812 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11815 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}
11817 "&& reload_completed && REG_P (operands[1])"
11818 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11819 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11821 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11823 [(set_attr "type" "ssemov")
11824 (set_attr "prefix_extra" "1")
11825 (set_attr "prefix" "vex")
11826 (set_attr "mode" "V8SF")])
11828 (define_insn "avx_vbroadcastf128_<mode>"
11829 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11830 (vec_concat:AVX256MODE
11831 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11835 vbroadcastf128\t{%1, %0|%0, %1}
11836 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11837 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11838 [(set_attr "type" "ssemov,sselog1,sselog1")
11839 (set_attr "prefix_extra" "1")
11840 (set_attr "length_immediate" "0,1,1")
11841 (set_attr "prefix" "vex")
11842 (set_attr "mode" "V4SF,V8SF,V8SF")])
11844 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11845 ;; If it so happens that the input is in memory, use vbroadcast.
11846 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11847 (define_insn "*avx_vperm_broadcast_v4sf"
11848 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11850 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11851 (match_parallel 2 "avx_vbroadcast_operand"
11852 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11855 int elt = INTVAL (operands[3]);
11856 switch (which_alternative)
11860 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11861 return "vbroadcastss\t{%1, %0|%0, %1}";
11863 operands[2] = GEN_INT (elt * 0x55);
11864 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11866 gcc_unreachable ();
11869 [(set_attr "type" "ssemov,ssemov,sselog1")
11870 (set_attr "prefix_extra" "1")
11871 (set_attr "length_immediate" "0,0,1")
11872 (set_attr "prefix" "vex")
11873 (set_attr "mode" "SF,SF,V4SF")])
11875 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11876 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11877 (vec_select:AVX256MODEF2P
11878 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11879 (match_parallel 2 "avx_vbroadcast_operand"
11880 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11883 "&& reload_completed"
11884 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11886 rtx op0 = operands[0], op1 = operands[1];
11887 int elt = INTVAL (operands[3]);
11893 /* Shuffle element we care about into all elements of the 128-bit lane.
11894 The other lane gets shuffled too, but we don't care. */
11895 if (<MODE>mode == V4DFmode)
11896 mask = (elt & 1 ? 15 : 0);
11898 mask = (elt & 3) * 0x55;
11899 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11901 /* Shuffle the lane we care about into both lanes of the dest. */
11902 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11903 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11907 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11908 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11911 (define_expand "avx_vpermil<mode>"
11912 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11913 (vec_select:AVXMODEFDP
11914 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11915 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11918 int mask = INTVAL (operands[2]);
11919 rtx perm[<ssescalarnum>];
11921 perm[0] = GEN_INT (mask & 1);
11922 perm[1] = GEN_INT ((mask >> 1) & 1);
11923 if (<MODE>mode == V4DFmode)
11925 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11926 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11930 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11933 (define_expand "avx_vpermil<mode>"
11934 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11935 (vec_select:AVXMODEFSP
11936 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11937 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11940 int mask = INTVAL (operands[2]);
11941 rtx perm[<ssescalarnum>];
11943 perm[0] = GEN_INT (mask & 3);
11944 perm[1] = GEN_INT ((mask >> 2) & 3);
11945 perm[2] = GEN_INT ((mask >> 4) & 3);
11946 perm[3] = GEN_INT ((mask >> 6) & 3);
11947 if (<MODE>mode == V8SFmode)
11949 perm[4] = GEN_INT ((mask & 3) + 4);
11950 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11951 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11952 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11956 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11959 (define_insn "*avx_vpermilp<mode>"
11960 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11961 (vec_select:AVXMODEF2P
11962 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11963 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11964 [(match_operand 3 "const_int_operand" "")])))]
11967 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11968 operands[2] = GEN_INT (mask);
11969 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11971 [(set_attr "type" "sselog")
11972 (set_attr "prefix_extra" "1")
11973 (set_attr "length_immediate" "1")
11974 (set_attr "prefix" "vex")
11975 (set_attr "mode" "<MODE>")])
11977 (define_insn "avx_vpermilvar<mode>3"
11978 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11980 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11981 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11984 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11985 [(set_attr "type" "sselog")
11986 (set_attr "prefix_extra" "1")
11987 (set_attr "prefix" "vex")
11988 (set_attr "mode" "<MODE>")])
11990 (define_expand "avx_vperm2f128<mode>3"
11991 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11992 (unspec:AVX256MODE2P
11993 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11994 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11995 (match_operand:SI 3 "const_0_to_255_operand" "")]
11996 UNSPEC_VPERMIL2F128))]
11999 int mask = INTVAL (operands[3]);
12000 if ((mask & 0x88) == 0)
12002 rtx perm[<ssescalarnum>], t1, t2;
12003 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12005 base = (mask & 3) * nelt2;
12006 for (i = 0; i < nelt2; ++i)
12007 perm[i] = GEN_INT (base + i);
12009 base = ((mask >> 4) & 3) * nelt2;
12010 for (i = 0; i < nelt2; ++i)
12011 perm[i + nelt2] = GEN_INT (base + i);
12013 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
12014 operands[1], operands[2]);
12015 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12016 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12017 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12023 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12024 ;; means that in order to represent this properly in rtl we'd have to
12025 ;; nest *another* vec_concat with a zero operand and do the select from
12026 ;; a 4x wide vector. That doesn't seem very nice.
12027 (define_insn "*avx_vperm2f128<mode>_full"
12028 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12029 (unspec:AVX256MODE2P
12030 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12031 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12032 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12033 UNSPEC_VPERMIL2F128))]
12035 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12036 [(set_attr "type" "sselog")
12037 (set_attr "prefix_extra" "1")
12038 (set_attr "length_immediate" "1")
12039 (set_attr "prefix" "vex")
12040 (set_attr "mode" "V8SF")])
12042 (define_insn "*avx_vperm2f128<mode>_nozero"
12043 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12044 (vec_select:AVX256MODE2P
12045 (vec_concat:<ssedoublesizemode>
12046 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12047 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12048 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
12049 [(match_operand 4 "const_int_operand" "")])))]
12052 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12053 operands[3] = GEN_INT (mask);
12054 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12056 [(set_attr "type" "sselog")
12057 (set_attr "prefix_extra" "1")
12058 (set_attr "length_immediate" "1")
12059 (set_attr "prefix" "vex")
12060 (set_attr "mode" "V8SF")])
12062 (define_expand "avx_vinsertf128<mode>"
12063 [(match_operand:AVX256MODE 0 "register_operand" "")
12064 (match_operand:AVX256MODE 1 "register_operand" "")
12065 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12066 (match_operand:SI 3 "const_0_to_1_operand" "")]
12069 switch (INTVAL (operands[3]))
12072 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12076 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12080 gcc_unreachable ();
12085 (define_insn "vec_set_lo_<mode>"
12086 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12087 (vec_concat:AVX256MODE4P
12088 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12089 (vec_select:<avxhalfvecmode>
12090 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12091 (parallel [(const_int 2) (const_int 3)]))))]
12093 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12094 [(set_attr "type" "sselog")
12095 (set_attr "prefix_extra" "1")
12096 (set_attr "length_immediate" "1")
12097 (set_attr "prefix" "vex")
12098 (set_attr "mode" "V8SF")])
12100 (define_insn "vec_set_hi_<mode>"
12101 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12102 (vec_concat:AVX256MODE4P
12103 (vec_select:<avxhalfvecmode>
12104 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12105 (parallel [(const_int 0) (const_int 1)]))
12106 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12108 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12109 [(set_attr "type" "sselog")
12110 (set_attr "prefix_extra" "1")
12111 (set_attr "length_immediate" "1")
12112 (set_attr "prefix" "vex")
12113 (set_attr "mode" "V8SF")])
12115 (define_insn "vec_set_lo_<mode>"
12116 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12117 (vec_concat:AVX256MODE8P
12118 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12119 (vec_select:<avxhalfvecmode>
12120 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12121 (parallel [(const_int 4) (const_int 5)
12122 (const_int 6) (const_int 7)]))))]
12124 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12125 [(set_attr "type" "sselog")
12126 (set_attr "prefix_extra" "1")
12127 (set_attr "length_immediate" "1")
12128 (set_attr "prefix" "vex")
12129 (set_attr "mode" "V8SF")])
12131 (define_insn "vec_set_hi_<mode>"
12132 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12133 (vec_concat:AVX256MODE8P
12134 (vec_select:<avxhalfvecmode>
12135 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12136 (parallel [(const_int 0) (const_int 1)
12137 (const_int 2) (const_int 3)]))
12138 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12140 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12141 [(set_attr "type" "sselog")
12142 (set_attr "prefix_extra" "1")
12143 (set_attr "length_immediate" "1")
12144 (set_attr "prefix" "vex")
12145 (set_attr "mode" "V8SF")])
12147 (define_insn "vec_set_lo_v16hi"
12148 [(set (match_operand:V16HI 0 "register_operand" "=x")
12150 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12152 (match_operand:V16HI 1 "register_operand" "x")
12153 (parallel [(const_int 8) (const_int 9)
12154 (const_int 10) (const_int 11)
12155 (const_int 12) (const_int 13)
12156 (const_int 14) (const_int 15)]))))]
12158 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12159 [(set_attr "type" "sselog")
12160 (set_attr "prefix_extra" "1")
12161 (set_attr "length_immediate" "1")
12162 (set_attr "prefix" "vex")
12163 (set_attr "mode" "V8SF")])
12165 (define_insn "vec_set_hi_v16hi"
12166 [(set (match_operand:V16HI 0 "register_operand" "=x")
12169 (match_operand:V16HI 1 "register_operand" "x")
12170 (parallel [(const_int 0) (const_int 1)
12171 (const_int 2) (const_int 3)
12172 (const_int 4) (const_int 5)
12173 (const_int 6) (const_int 7)]))
12174 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12176 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12177 [(set_attr "type" "sselog")
12178 (set_attr "prefix_extra" "1")
12179 (set_attr "length_immediate" "1")
12180 (set_attr "prefix" "vex")
12181 (set_attr "mode" "V8SF")])
12183 (define_insn "vec_set_lo_v32qi"
12184 [(set (match_operand:V32QI 0 "register_operand" "=x")
12186 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12188 (match_operand:V32QI 1 "register_operand" "x")
12189 (parallel [(const_int 16) (const_int 17)
12190 (const_int 18) (const_int 19)
12191 (const_int 20) (const_int 21)
12192 (const_int 22) (const_int 23)
12193 (const_int 24) (const_int 25)
12194 (const_int 26) (const_int 27)
12195 (const_int 28) (const_int 29)
12196 (const_int 30) (const_int 31)]))))]
12198 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12199 [(set_attr "type" "sselog")
12200 (set_attr "prefix_extra" "1")
12201 (set_attr "length_immediate" "1")
12202 (set_attr "prefix" "vex")
12203 (set_attr "mode" "V8SF")])
12205 (define_insn "vec_set_hi_v32qi"
12206 [(set (match_operand:V32QI 0 "register_operand" "=x")
12209 (match_operand:V32QI 1 "register_operand" "x")
12210 (parallel [(const_int 0) (const_int 1)
12211 (const_int 2) (const_int 3)
12212 (const_int 4) (const_int 5)
12213 (const_int 6) (const_int 7)
12214 (const_int 8) (const_int 9)
12215 (const_int 10) (const_int 11)
12216 (const_int 12) (const_int 13)
12217 (const_int 14) (const_int 15)]))
12218 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12220 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12221 [(set_attr "type" "sselog")
12222 (set_attr "prefix_extra" "1")
12223 (set_attr "length_immediate" "1")
12224 (set_attr "prefix" "vex")
12225 (set_attr "mode" "V8SF")])
12227 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12228 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12230 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12231 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12235 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12236 [(set_attr "type" "sselog1")
12237 (set_attr "prefix_extra" "1")
12238 (set_attr "prefix" "vex")
12239 (set_attr "mode" "<MODE>")])
12241 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12242 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12244 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12245 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12247 UNSPEC_MASKSTORE))]
12249 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12250 [(set_attr "type" "sselog1")
12251 (set_attr "prefix_extra" "1")
12252 (set_attr "prefix" "vex")
12253 (set_attr "mode" "<MODE>")])
12255 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12256 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12257 (unspec:AVX256MODE2P
12258 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12262 switch (which_alternative)
12267 switch (get_attr_mode (insn))
12270 return "vmovaps\t{%1, %x0|%x0, %1}";
12272 return "vmovapd\t{%1, %x0|%x0, %1}";
12274 return "vmovdqa\t{%1, %x0|%x0, %1}";
12281 gcc_unreachable ();
12283 [(set_attr "type" "ssemov")
12284 (set_attr "prefix" "vex")
12285 (set_attr "mode" "<avxvecmode>")
12286 (set (attr "length")
12287 (if_then_else (eq_attr "alternative" "0")
12289 (const_string "*")))])
12291 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12292 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12293 (unspec:<avxhalfvecmode>
12294 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12298 switch (which_alternative)
12303 switch (get_attr_mode (insn))
12306 return "vmovaps\t{%x1, %0|%0, %x1}";
12308 return "vmovapd\t{%x1, %0|%0, %x1}";
12310 return "vmovdqa\t{%x1, %0|%0, %x1}";
12317 gcc_unreachable ();
12319 [(set_attr "type" "ssemov")
12320 (set_attr "prefix" "vex")
12321 (set_attr "mode" "<avxvecmode>")
12322 (set (attr "length")
12323 (if_then_else (eq_attr "alternative" "0")
12325 (const_string "*")))])
12327 (define_expand "vec_init<mode>"
12328 [(match_operand:AVX256MODE 0 "register_operand" "")
12329 (match_operand 1 "" "")]
12332 ix86_expand_vector_init (false, operands[0], operands[1]);
12336 (define_insn "*vec_concat<mode>_avx"
12337 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12338 (vec_concat:AVX256MODE
12339 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12340 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12343 switch (which_alternative)
12346 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12348 switch (get_attr_mode (insn))
12351 return "vmovaps\t{%1, %x0|%x0, %1}";
12353 return "vmovapd\t{%1, %x0|%x0, %1}";
12355 return "vmovdqa\t{%1, %x0|%x0, %1}";
12358 gcc_unreachable ();
12361 [(set_attr "type" "sselog,ssemov")
12362 (set_attr "prefix_extra" "1,*")
12363 (set_attr "length_immediate" "1,*")
12364 (set_attr "prefix" "vex")
12365 (set_attr "mode" "<avxvecmode>")])