1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the insn mnemonic suffix
93 (define_mode_attr ssemodesuffix
94 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
95 (V8SI "ps") (V4DI "pd")])
96 (define_mode_attr ssescalarmodesuffix
97 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
100 ;; Mapping of the max integer size for xop rotate immediate constraint
101 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
103 ;; Mapping of vector modes back to the scalar modes
104 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
105 (V16QI "QI") (V8HI "HI")
106 (V4SI "SI") (V2DI "DI")])
108 ;; Mapping of vector modes to a vector mode of double size
109 (define_mode_attr ssedoublesizemode
110 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
111 (V8HI "V16HI") (V16QI "V32QI")
112 (V4DF "V8DF") (V8SF "V16SF")
113 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
115 ;; Number of scalar elements in each vector type
116 (define_mode_attr ssescalarnum
117 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
118 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
121 (define_mode_attr avxvecmode
122 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
123 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
124 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
125 (define_mode_attr avxvecpsmode
126 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
127 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
128 (define_mode_attr avxhalfvecmode
129 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
130 (V8SF "V4SF") (V4DF "V2DF")
131 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
132 (define_mode_attr avxscalarmode
133 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
134 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
135 (define_mode_attr avxcvtvecmode
136 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
137 (define_mode_attr avxpermvecmode
138 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
139 (define_mode_attr avxmodesuffixp
140 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
142 (define_mode_attr avxmodesuffix
143 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
144 (V8SI "256") (V8SF "256") (V4DF "256")])
146 ;; Mapping of immediate bits for blend instructions
147 (define_mode_attr blendbits
148 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
150 ;; Mapping of immediate bits for pinsr instructions
151 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
153 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
155 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161 (define_expand "mov<mode>"
162 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
163 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
166 ix86_expand_vector_move (<MODE>mode, operands);
170 (define_insn "*avx_mov<mode>_internal"
171 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
172 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 return standard_sse_constant_opcode (insn, operands[1]);
183 switch (get_attr_mode (insn))
187 return "vmovaps\t{%1, %0|%0, %1}";
190 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
191 return "vmovaps\t{%1, %0|%0, %1}";
193 return "vmovapd\t{%1, %0|%0, %1}";
195 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
196 return "vmovaps\t{%1, %0|%0, %1}";
198 return "vmovdqa\t{%1, %0|%0, %1}";
204 [(set_attr "type" "sselog1,ssemov,ssemov")
205 (set_attr "prefix" "vex")
206 (set_attr "mode" "<avxvecmode>")])
208 ;; All of these patterns are enabled for SSE1 as well as SSE2.
209 ;; This is essential for maintaining stable calling conventions.
211 (define_expand "mov<mode>"
212 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
213 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
216 ix86_expand_vector_move (<MODE>mode, operands);
220 (define_insn "*mov<mode>_internal"
221 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
222 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
224 && (register_operand (operands[0], <MODE>mode)
225 || register_operand (operands[1], <MODE>mode))"
227 switch (which_alternative)
230 return standard_sse_constant_opcode (insn, operands[1]);
233 switch (get_attr_mode (insn))
236 return "movaps\t{%1, %0|%0, %1}";
238 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
239 return "movaps\t{%1, %0|%0, %1}";
241 return "movapd\t{%1, %0|%0, %1}";
243 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
244 return "movaps\t{%1, %0|%0, %1}";
246 return "movdqa\t{%1, %0|%0, %1}";
252 [(set_attr "type" "sselog1,ssemov,ssemov")
254 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
255 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
256 (and (eq_attr "alternative" "2")
257 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
259 (const_string "V4SF")
260 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
261 (const_string "V4SF")
262 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
263 (const_string "V2DF")
265 (const_string "TI")))])
267 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
268 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
269 ;; from memory, we'd prefer to load the memory directly into the %xmm
270 ;; register. To facilitate this happy circumstance, this pattern won't
271 ;; split until after register allocation. If the 64-bit value didn't
272 ;; come from memory, this is the best we can do. This is much better
273 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
276 (define_insn_and_split "movdi_to_sse"
278 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
279 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
280 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
281 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
283 "&& reload_completed"
286 if (register_operand (operands[1], DImode))
288 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
289 Assemble the 64-bit DImode value in an xmm register. */
290 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
291 gen_rtx_SUBREG (SImode, operands[1], 0)));
292 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
293 gen_rtx_SUBREG (SImode, operands[1], 4)));
294 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
297 else if (memory_operand (operands[1], DImode))
298 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
299 operands[1], const0_rtx));
305 [(set (match_operand:V4SF 0 "register_operand" "")
306 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
307 "TARGET_SSE && reload_completed"
310 (vec_duplicate:V4SF (match_dup 1))
314 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
315 operands[2] = CONST0_RTX (V4SFmode);
319 [(set (match_operand:V2DF 0 "register_operand" "")
320 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
321 "TARGET_SSE2 && reload_completed"
322 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
324 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
325 operands[2] = CONST0_RTX (DFmode);
328 (define_expand "push<mode>1"
329 [(match_operand:AVX256MODE 0 "register_operand" "")]
332 ix86_expand_push (<MODE>mode, operands[0]);
336 (define_expand "push<mode>1"
337 [(match_operand:SSEMODE16 0 "register_operand" "")]
340 ix86_expand_push (<MODE>mode, operands[0]);
344 (define_expand "movmisalign<mode>"
345 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
346 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
349 ix86_expand_vector_move_misalign (<MODE>mode, operands);
353 (define_expand "movmisalign<mode>"
354 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
355 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
358 ix86_expand_vector_move_misalign (<MODE>mode, operands);
362 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
363 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
365 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
367 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
368 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
369 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
370 [(set_attr "type" "ssemov")
371 (set_attr "movu" "1")
372 (set_attr "prefix" "vex")
373 (set_attr "mode" "<MODE>")])
375 (define_insn "sse2_movq128"
376 [(set (match_operand:V2DI 0 "register_operand" "=x")
379 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
380 (parallel [(const_int 0)]))
383 "%vmovq\t{%1, %0|%0, %1}"
384 [(set_attr "type" "ssemov")
385 (set_attr "prefix" "maybe_vex")
386 (set_attr "mode" "TI")])
388 (define_insn "<sse>_movu<ssemodesuffix>"
389 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
391 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
393 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
394 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
395 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
396 [(set_attr "type" "ssemov")
397 (set_attr "movu" "1")
398 (set_attr "mode" "<MODE>")])
400 (define_insn "avx_movdqu<avxmodesuffix>"
401 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
403 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
405 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
406 "vmovdqu\t{%1, %0|%0, %1}"
407 [(set_attr "type" "ssemov")
408 (set_attr "movu" "1")
409 (set_attr "prefix" "vex")
410 (set_attr "mode" "<avxvecmode>")])
412 (define_insn "sse2_movdqu"
413 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
414 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
416 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
417 "movdqu\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "movu" "1")
420 (set_attr "prefix_data16" "1")
421 (set_attr "mode" "TI")])
423 (define_insn "avx_movnt<mode>"
424 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
426 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
428 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
429 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
430 [(set_attr "type" "ssemov")
431 (set_attr "prefix" "vex")
432 (set_attr "mode" "<MODE>")])
434 (define_insn "<sse>_movnt<mode>"
435 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
437 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
439 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
440 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "mode" "<MODE>")])
444 (define_insn "avx_movnt<mode>"
445 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
447 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
450 "vmovntdq\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse2_movntv2di"
456 [(set (match_operand:V2DI 0 "memory_operand" "=m")
457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
460 "movntdq\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssemov")
462 (set_attr "prefix_data16" "1")
463 (set_attr "mode" "TI")])
465 (define_insn "sse2_movntsi"
466 [(set (match_operand:SI 0 "memory_operand" "=m")
467 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
470 "movnti\t{%1, %0|%0, %1}"
471 [(set_attr "type" "ssemov")
472 (set_attr "prefix_data16" "0")
473 (set_attr "mode" "V2DF")])
475 (define_insn "avx_lddqu<avxmodesuffix>"
476 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
478 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
481 "vlddqu\t{%1, %0|%0, %1}"
482 [(set_attr "type" "ssecvt")
483 (set_attr "movu" "1")
484 (set_attr "prefix" "vex")
485 (set_attr "mode" "<avxvecmode>")])
487 (define_insn "sse3_lddqu"
488 [(set (match_operand:V16QI 0 "register_operand" "=x")
489 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
492 "lddqu\t{%1, %0|%0, %1}"
493 [(set_attr "type" "ssemov")
494 (set_attr "movu" "1")
495 (set_attr "prefix_data16" "0")
496 (set_attr "prefix_rep" "1")
497 (set_attr "mode" "TI")])
499 ; Expand patterns for non-temporal stores. At the moment, only those
500 ; that directly map to insns are defined; it would be possible to
501 ; define patterns for other modes that would expand to several insns.
503 (define_expand "storent<mode>"
504 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
506 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
508 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
511 (define_expand "storent<mode>"
512 [(set (match_operand:MODEF 0 "memory_operand" "")
514 [(match_operand:MODEF 1 "register_operand" "")]
519 (define_expand "storentv2di"
520 [(set (match_operand:V2DI 0 "memory_operand" "")
521 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
526 (define_expand "storentsi"
527 [(set (match_operand:SI 0 "memory_operand" "")
528 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
533 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
535 ;; Parallel floating point arithmetic
537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
539 (define_expand "<code><mode>2"
540 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
542 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
543 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
544 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
546 (define_expand "<plusminus_insn><mode>3"
547 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
548 (plusminus:AVX256MODEF2P
549 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
550 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
551 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
552 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
554 (define_insn "*avx_<plusminus_insn><mode>3"
555 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
556 (plusminus:AVXMODEF2P
557 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
558 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
559 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
560 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
561 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "prefix" "vex")
564 (set_attr "mode" "<avxvecmode>")])
566 (define_expand "<plusminus_insn><mode>3"
567 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
571 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
574 (define_insn "*<plusminus_insn><mode>3"
575 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
576 (plusminus:SSEMODEF2P
577 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
579 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
580 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
581 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "<MODE>")])
585 (define_insn "*avx_vm<plusminus_insn><mode>3"
586 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
587 (vec_merge:SSEMODEF2P
588 (plusminus:SSEMODEF2P
589 (match_operand:SSEMODEF2P 1 "register_operand" "x")
590 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
593 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
594 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
595 [(set_attr "type" "sseadd")
596 (set_attr "prefix" "vex")
597 (set_attr "mode" "<ssescalarmode>")])
599 (define_insn "<sse>_vm<plusminus_insn><mode>3"
600 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
601 (vec_merge:SSEMODEF2P
602 (plusminus:SSEMODEF2P
603 (match_operand:SSEMODEF2P 1 "register_operand" "0")
604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
607 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
608 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
609 [(set_attr "type" "sseadd")
610 (set_attr "mode" "<ssescalarmode>")])
612 (define_expand "mul<mode>3"
613 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
615 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
616 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
617 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
618 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
620 (define_insn "*avx_mul<mode>3"
621 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
623 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
624 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
626 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
627 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
628 [(set_attr "type" "ssemul")
629 (set_attr "prefix" "vex")
630 (set_attr "mode" "<avxvecmode>")])
632 (define_expand "mul<mode>3"
633 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
635 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
637 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
638 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
640 (define_insn "*mul<mode>3"
641 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
643 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
644 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
645 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
646 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
647 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
648 [(set_attr "type" "ssemul")
649 (set_attr "mode" "<MODE>")])
651 (define_insn "*avx_vmmul<mode>3"
652 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
653 (vec_merge:SSEMODEF2P
655 (match_operand:SSEMODEF2P 1 "register_operand" "x")
656 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
659 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
660 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
661 [(set_attr "type" "ssemul")
662 (set_attr "prefix" "vex")
663 (set_attr "mode" "<ssescalarmode>")])
665 (define_insn "<sse>_vmmul<mode>3"
666 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
667 (vec_merge:SSEMODEF2P
669 (match_operand:SSEMODEF2P 1 "register_operand" "0")
670 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
673 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
674 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
675 [(set_attr "type" "ssemul")
676 (set_attr "mode" "<ssescalarmode>")])
678 (define_expand "divv8sf3"
679 [(set (match_operand:V8SF 0 "register_operand" "")
680 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
681 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
684 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
686 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V8SFmode);
696 (define_expand "divv4df3"
697 [(set (match_operand:V4DF 0 "register_operand" "")
698 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
699 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
701 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
703 (define_insn "avx_div<mode>3"
704 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
706 (match_operand:AVXMODEF2P 1 "register_operand" "x")
707 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_expand "divv4sf3"
715 [(set (match_operand:V4SF 0 "register_operand" "")
716 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
717 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
720 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
721 && flag_finite_math_only && !flag_trapping_math
722 && flag_unsafe_math_optimizations)
724 ix86_emit_swdivsf (operands[0], operands[1],
725 operands[2], V4SFmode);
730 (define_expand "divv2df3"
731 [(set (match_operand:V2DF 0 "register_operand" "")
732 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
733 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
737 (define_insn "*avx_div<mode>3"
738 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
742 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
743 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
744 [(set_attr "type" "ssediv")
745 (set_attr "prefix" "vex")
746 (set_attr "mode" "<MODE>")])
748 (define_insn "<sse>_div<mode>3"
749 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
751 (match_operand:SSEMODEF2P 1 "register_operand" "0")
752 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
753 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
754 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
755 [(set_attr "type" "ssediv")
756 (set_attr "mode" "<MODE>")])
758 (define_insn "*avx_vmdiv<mode>3"
759 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
760 (vec_merge:SSEMODEF2P
762 (match_operand:SSEMODEF2P 1 "register_operand" "x")
763 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
766 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
767 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
768 [(set_attr "type" "ssediv")
769 (set_attr "prefix" "vex")
770 (set_attr "mode" "<ssescalarmode>")])
772 (define_insn "<sse>_vmdiv<mode>3"
773 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
774 (vec_merge:SSEMODEF2P
776 (match_operand:SSEMODEF2P 1 "register_operand" "0")
777 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
780 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
781 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
782 [(set_attr "type" "ssediv")
783 (set_attr "mode" "<ssescalarmode>")])
785 (define_insn "avx_rcpv8sf2"
786 [(set (match_operand:V8SF 0 "register_operand" "=x")
788 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
790 "vrcpps\t{%1, %0|%0, %1}"
791 [(set_attr "type" "sse")
792 (set_attr "prefix" "vex")
793 (set_attr "mode" "V8SF")])
795 (define_insn "sse_rcpv4sf2"
796 [(set (match_operand:V4SF 0 "register_operand" "=x")
798 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
800 "%vrcpps\t{%1, %0|%0, %1}"
801 [(set_attr "type" "sse")
802 (set_attr "atom_sse_attr" "rcp")
803 (set_attr "prefix" "maybe_vex")
804 (set_attr "mode" "V4SF")])
806 (define_insn "*avx_vmrcpv4sf2"
807 [(set (match_operand:V4SF 0 "register_operand" "=x")
809 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
811 (match_operand:V4SF 2 "register_operand" "x")
814 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
815 [(set_attr "type" "sse")
816 (set_attr "prefix" "vex")
817 (set_attr "mode" "SF")])
819 (define_insn "sse_vmrcpv4sf2"
820 [(set (match_operand:V4SF 0 "register_operand" "=x")
822 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
824 (match_operand:V4SF 2 "register_operand" "0")
827 "rcpss\t{%1, %0|%0, %1}"
828 [(set_attr "type" "sse")
829 (set_attr "atom_sse_attr" "rcp")
830 (set_attr "mode" "SF")])
832 (define_expand "sqrtv8sf2"
833 [(set (match_operand:V8SF 0 "register_operand" "")
834 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
837 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
838 && flag_finite_math_only && !flag_trapping_math
839 && flag_unsafe_math_optimizations)
841 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
846 (define_insn "avx_sqrtv8sf2"
847 [(set (match_operand:V8SF 0 "register_operand" "=x")
848 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
850 "vsqrtps\t{%1, %0|%0, %1}"
851 [(set_attr "type" "sse")
852 (set_attr "prefix" "vex")
853 (set_attr "mode" "V8SF")])
855 (define_expand "sqrtv4sf2"
856 [(set (match_operand:V4SF 0 "register_operand" "")
857 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
860 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
861 && flag_finite_math_only && !flag_trapping_math
862 && flag_unsafe_math_optimizations)
864 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
869 (define_insn "sse_sqrtv4sf2"
870 [(set (match_operand:V4SF 0 "register_operand" "=x")
871 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
873 "%vsqrtps\t{%1, %0|%0, %1}"
874 [(set_attr "type" "sse")
875 (set_attr "atom_sse_attr" "sqrt")
876 (set_attr "prefix" "maybe_vex")
877 (set_attr "mode" "V4SF")])
879 (define_insn "sqrtv4df2"
880 [(set (match_operand:V4DF 0 "register_operand" "=x")
881 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
883 "vsqrtpd\t{%1, %0|%0, %1}"
884 [(set_attr "type" "sse")
885 (set_attr "prefix" "vex")
886 (set_attr "mode" "V4DF")])
888 (define_insn "sqrtv2df2"
889 [(set (match_operand:V2DF 0 "register_operand" "=x")
890 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
892 "%vsqrtpd\t{%1, %0|%0, %1}"
893 [(set_attr "type" "sse")
894 (set_attr "prefix" "maybe_vex")
895 (set_attr "mode" "V2DF")])
897 (define_insn "*avx_vmsqrt<mode>2"
898 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
899 (vec_merge:SSEMODEF2P
901 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
902 (match_operand:SSEMODEF2P 2 "register_operand" "x")
904 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
905 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
906 [(set_attr "type" "sse")
907 (set_attr "prefix" "vex")
908 (set_attr "mode" "<ssescalarmode>")])
910 (define_insn "<sse>_vmsqrt<mode>2"
911 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
912 (vec_merge:SSEMODEF2P
914 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
915 (match_operand:SSEMODEF2P 2 "register_operand" "0")
917 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
918 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
919 [(set_attr "type" "sse")
920 (set_attr "atom_sse_attr" "sqrt")
921 (set_attr "mode" "<ssescalarmode>")])
923 (define_expand "rsqrtv8sf2"
924 [(set (match_operand:V8SF 0 "register_operand" "")
926 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
927 "TARGET_AVX && TARGET_SSE_MATH"
929 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
933 (define_insn "avx_rsqrtv8sf2"
934 [(set (match_operand:V8SF 0 "register_operand" "=x")
936 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
938 "vrsqrtps\t{%1, %0|%0, %1}"
939 [(set_attr "type" "sse")
940 (set_attr "prefix" "vex")
941 (set_attr "mode" "V8SF")])
943 (define_expand "rsqrtv4sf2"
944 [(set (match_operand:V4SF 0 "register_operand" "")
946 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
949 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
953 (define_insn "sse_rsqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
956 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
958 "%vrsqrtps\t{%1, %0|%0, %1}"
959 [(set_attr "type" "sse")
960 (set_attr "prefix" "maybe_vex")
961 (set_attr "mode" "V4SF")])
963 (define_insn "*avx_vmrsqrtv4sf2"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
966 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
968 (match_operand:V4SF 2 "register_operand" "x")
971 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
972 [(set_attr "type" "sse")
973 (set_attr "prefix" "vex")
974 (set_attr "mode" "SF")])
976 (define_insn "sse_vmrsqrtv4sf2"
977 [(set (match_operand:V4SF 0 "register_operand" "=x")
979 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
981 (match_operand:V4SF 2 "register_operand" "0")
984 "rsqrtss\t{%1, %0|%0, %1}"
985 [(set_attr "type" "sse")
986 (set_attr "mode" "SF")])
988 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
989 ;; isn't really correct, as those rtl operators aren't defined when
990 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
992 (define_expand "<code><mode>3"
993 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
994 (smaxmin:AVX256MODEF2P
995 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
996 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
997 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
999 if (!flag_finite_math_only)
1000 operands[1] = force_reg (<MODE>mode, operands[1]);
1001 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1004 (define_expand "<code><mode>3"
1005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1007 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1008 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1009 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1011 if (!flag_finite_math_only)
1012 operands[1] = force_reg (<MODE>mode, operands[1]);
1013 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1016 (define_insn "*avx_<code><mode>3_finite"
1017 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1019 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1020 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1021 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1022 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1023 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "type" "sseadd")
1025 (set_attr "prefix" "vex")
1026 (set_attr "mode" "<MODE>")])
1028 (define_insn "*<code><mode>3_finite"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1031 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1032 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1033 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1034 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1035 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1036 [(set_attr "type" "sseadd")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "*avx_<code><mode>3"
1040 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1042 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1043 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1044 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1046 [(set_attr "type" "sseadd")
1047 (set_attr "prefix" "vex")
1048 (set_attr "mode" "<avxvecmode>")])
1050 (define_insn "*<code><mode>3"
1051 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1053 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1054 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1055 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1056 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1057 [(set_attr "type" "sseadd")
1058 (set_attr "mode" "<MODE>")])
1060 (define_insn "*avx_vm<code><mode>3"
1061 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1062 (vec_merge:SSEMODEF2P
1064 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1068 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1069 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070 [(set_attr "type" "sse")
1071 (set_attr "prefix" "vex")
1072 (set_attr "mode" "<ssescalarmode>")])
1074 (define_insn "<sse>_vm<code><mode>3"
1075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1076 (vec_merge:SSEMODEF2P
1078 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1079 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1082 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1083 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1084 [(set_attr "type" "sseadd")
1085 (set_attr "mode" "<ssescalarmode>")])
1087 ;; These versions of the min/max patterns implement exactly the operations
1088 ;; min = (op1 < op2 ? op1 : op2)
1089 ;; max = (!(op1 < op2) ? op1 : op2)
1090 ;; Their operands are not commutative, and thus they may be used in the
1091 ;; presence of -0.0 and NaN.
1093 (define_insn "*avx_ieee_smin<mode>3"
1094 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1096 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1097 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1099 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1100 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1101 [(set_attr "type" "sseadd")
1102 (set_attr "prefix" "vex")
1103 (set_attr "mode" "<avxvecmode>")])
1105 (define_insn "*avx_ieee_smax<mode>3"
1106 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1108 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1109 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1111 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1112 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "prefix" "vex")
1115 (set_attr "mode" "<avxvecmode>")])
1117 (define_insn "*ieee_smin<mode>3"
1118 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1120 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1121 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1123 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1124 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "mode" "<MODE>")])
1128 (define_insn "*ieee_smax<mode>3"
1129 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1131 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1132 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1134 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1135 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1136 [(set_attr "type" "sseadd")
1137 (set_attr "mode" "<MODE>")])
1139 (define_insn "avx_addsubv8sf3"
1140 [(set (match_operand:V8SF 0 "register_operand" "=x")
1143 (match_operand:V8SF 1 "register_operand" "x")
1144 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1145 (minus:V8SF (match_dup 1) (match_dup 2))
1148 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1149 [(set_attr "type" "sseadd")
1150 (set_attr "prefix" "vex")
1151 (set_attr "mode" "V8SF")])
1153 (define_insn "avx_addsubv4df3"
1154 [(set (match_operand:V4DF 0 "register_operand" "=x")
1157 (match_operand:V4DF 1 "register_operand" "x")
1158 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1159 (minus:V4DF (match_dup 1) (match_dup 2))
1162 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "type" "sseadd")
1164 (set_attr "prefix" "vex")
1165 (set_attr "mode" "V4DF")])
1167 (define_insn "*avx_addsubv4sf3"
1168 [(set (match_operand:V4SF 0 "register_operand" "=x")
1171 (match_operand:V4SF 1 "register_operand" "x")
1172 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1173 (minus:V4SF (match_dup 1) (match_dup 2))
1176 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "prefix" "vex")
1179 (set_attr "mode" "V4SF")])
1181 (define_insn "sse3_addsubv4sf3"
1182 [(set (match_operand:V4SF 0 "register_operand" "=x")
1185 (match_operand:V4SF 1 "register_operand" "0")
1186 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1187 (minus:V4SF (match_dup 1) (match_dup 2))
1190 "addsubps\t{%2, %0|%0, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "prefix_rep" "1")
1193 (set_attr "mode" "V4SF")])
1195 (define_insn "*avx_addsubv2df3"
1196 [(set (match_operand:V2DF 0 "register_operand" "=x")
1199 (match_operand:V2DF 1 "register_operand" "x")
1200 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1201 (minus:V2DF (match_dup 1) (match_dup 2))
1204 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1205 [(set_attr "type" "sseadd")
1206 (set_attr "prefix" "vex")
1207 (set_attr "mode" "V2DF")])
1209 (define_insn "sse3_addsubv2df3"
1210 [(set (match_operand:V2DF 0 "register_operand" "=x")
1213 (match_operand:V2DF 1 "register_operand" "0")
1214 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1215 (minus:V2DF (match_dup 1) (match_dup 2))
1218 "addsubpd\t{%2, %0|%0, %2}"
1219 [(set_attr "type" "sseadd")
1220 (set_attr "atom_unit" "complex")
1221 (set_attr "mode" "V2DF")])
1223 (define_insn "avx_h<plusminus_insn>v4df3"
1224 [(set (match_operand:V4DF 0 "register_operand" "=x")
1229 (match_operand:V4DF 1 "register_operand" "x")
1230 (parallel [(const_int 0)]))
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1233 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1234 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1238 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1239 (parallel [(const_int 0)]))
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1242 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1243 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1245 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1246 [(set_attr "type" "sseadd")
1247 (set_attr "prefix" "vex")
1248 (set_attr "mode" "V4DF")])
1250 (define_insn "avx_h<plusminus_insn>v8sf3"
1251 [(set (match_operand:V8SF 0 "register_operand" "=x")
1257 (match_operand:V8SF 1 "register_operand" "x")
1258 (parallel [(const_int 0)]))
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1261 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1266 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1267 (parallel [(const_int 0)]))
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1270 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1279 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1282 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1285 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1286 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1288 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "type" "sseadd")
1290 (set_attr "prefix" "vex")
1291 (set_attr "mode" "V8SF")])
1293 (define_insn "*avx_h<plusminus_insn>v4sf3"
1294 [(set (match_operand:V4SF 0 "register_operand" "=x")
1299 (match_operand:V4SF 1 "register_operand" "x")
1300 (parallel [(const_int 0)]))
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1303 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1304 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1308 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1309 (parallel [(const_int 0)]))
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1312 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1313 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1315 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1316 [(set_attr "type" "sseadd")
1317 (set_attr "prefix" "vex")
1318 (set_attr "mode" "V4SF")])
1320 (define_insn "sse3_h<plusminus_insn>v4sf3"
1321 [(set (match_operand:V4SF 0 "register_operand" "=x")
1326 (match_operand:V4SF 1 "register_operand" "0")
1327 (parallel [(const_int 0)]))
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1330 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1331 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1335 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1336 (parallel [(const_int 0)]))
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1339 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1340 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1342 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1343 [(set_attr "type" "sseadd")
1344 (set_attr "atom_unit" "complex")
1345 (set_attr "prefix_rep" "1")
1346 (set_attr "mode" "V4SF")])
1348 (define_insn "*avx_h<plusminus_insn>v2df3"
1349 [(set (match_operand:V2DF 0 "register_operand" "=x")
1353 (match_operand:V2DF 1 "register_operand" "x")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1358 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1359 (parallel [(const_int 0)]))
1360 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1362 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1363 [(set_attr "type" "sseadd")
1364 (set_attr "prefix" "vex")
1365 (set_attr "mode" "V2DF")])
1367 (define_insn "sse3_h<plusminus_insn>v2df3"
1368 [(set (match_operand:V2DF 0 "register_operand" "=x")
1372 (match_operand:V2DF 1 "register_operand" "0")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1377 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1378 (parallel [(const_int 0)]))
1379 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1381 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1382 [(set_attr "type" "sseadd")
1383 (set_attr "mode" "V2DF")])
1385 (define_expand "reduc_splus_v4sf"
1386 [(match_operand:V4SF 0 "register_operand" "")
1387 (match_operand:V4SF 1 "register_operand" "")]
1392 rtx tmp = gen_reg_rtx (V4SFmode);
1393 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1394 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1397 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1401 (define_expand "reduc_splus_v2df"
1402 [(match_operand:V2DF 0 "register_operand" "")
1403 (match_operand:V2DF 1 "register_operand" "")]
1406 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1410 (define_expand "reduc_smax_v4sf"
1411 [(match_operand:V4SF 0 "register_operand" "")
1412 (match_operand:V4SF 1 "register_operand" "")]
1415 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1419 (define_expand "reduc_smin_v4sf"
1420 [(match_operand:V4SF 0 "register_operand" "")
1421 (match_operand:V4SF 1 "register_operand" "")]
1424 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430 ;; Parallel floating point comparisons
1432 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1434 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1435 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1437 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1438 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1439 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1442 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1443 [(set_attr "type" "ssecmp")
1444 (set_attr "length_immediate" "1")
1445 (set_attr "prefix" "vex")
1446 (set_attr "mode" "<MODE>")])
1448 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1449 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1450 (vec_merge:SSEMODEF2P
1452 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1453 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1454 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1459 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1460 [(set_attr "type" "ssecmp")
1461 (set_attr "length_immediate" "1")
1462 (set_attr "prefix" "vex")
1463 (set_attr "mode" "<ssescalarmode>")])
1465 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1466 ;; may generate 256bit vector compare instructions.
1467 (define_insn "*avx_maskcmp<mode>3"
1468 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1469 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1470 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1471 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1472 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1473 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1474 [(set_attr "type" "ssecmp")
1475 (set_attr "prefix" "vex")
1476 (set_attr "length_immediate" "1")
1477 (set_attr "mode" "<avxvecmode>")])
1479 (define_insn "<sse>_maskcmp<mode>3"
1480 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1481 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1482 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1483 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1485 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1486 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1487 [(set_attr "type" "ssecmp")
1488 (set_attr "length_immediate" "1")
1489 (set_attr "mode" "<MODE>")])
1491 (define_insn "*avx_vmmaskcmp<mode>3"
1492 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1493 (vec_merge:SSEMODEF2P
1494 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1495 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1496 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1499 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1500 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1501 [(set_attr "type" "ssecmp")
1502 (set_attr "prefix" "vex")
1503 (set_attr "mode" "<ssescalarmode>")])
1505 (define_insn "<sse>_vmmaskcmp<mode>3"
1506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1507 (vec_merge:SSEMODEF2P
1508 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1509 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1510 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1513 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1514 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1515 [(set_attr "type" "ssecmp")
1516 (set_attr "length_immediate" "1")
1517 (set_attr "mode" "<ssescalarmode>")])
1519 (define_insn "<sse>_comi"
1520 [(set (reg:CCFP FLAGS_REG)
1523 (match_operand:<ssevecmode> 0 "register_operand" "x")
1524 (parallel [(const_int 0)]))
1526 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1527 (parallel [(const_int 0)]))))]
1528 "SSE_FLOAT_MODE_P (<MODE>mode)"
1529 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "ssecomi")
1531 (set_attr "prefix" "maybe_vex")
1532 (set_attr "prefix_rep" "0")
1533 (set (attr "prefix_data16")
1534 (if_then_else (eq_attr "mode" "DF")
1536 (const_string "0")))
1537 (set_attr "mode" "<MODE>")])
1539 (define_insn "<sse>_ucomi"
1540 [(set (reg:CCFPU FLAGS_REG)
1543 (match_operand:<ssevecmode> 0 "register_operand" "x")
1544 (parallel [(const_int 0)]))
1546 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1547 (parallel [(const_int 0)]))))]
1548 "SSE_FLOAT_MODE_P (<MODE>mode)"
1549 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1550 [(set_attr "type" "ssecomi")
1551 (set_attr "prefix" "maybe_vex")
1552 (set_attr "prefix_rep" "0")
1553 (set (attr "prefix_data16")
1554 (if_then_else (eq_attr "mode" "DF")
1556 (const_string "0")))
1557 (set_attr "mode" "<MODE>")])
1559 (define_expand "vcond<mode>"
1560 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1561 (if_then_else:AVXMODEF2P
1562 (match_operator 3 ""
1563 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1564 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1565 (match_operand:AVXMODEF2P 1 "general_operand" "")
1566 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1567 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1570 bool ok = ix86_expand_fp_vcond (operands);
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 ;; Parallel floating point logical operations
1579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1581 (define_insn "avx_andnot<mode>3"
1582 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1585 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1586 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1587 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1588 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1589 [(set_attr "type" "sselog")
1590 (set_attr "prefix" "vex")
1591 (set_attr "mode" "<avxvecmode>")])
1593 (define_insn "<sse>_andnot<mode>3"
1594 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1597 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1598 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1599 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1600 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1601 [(set_attr "type" "sselog")
1602 (set_attr "mode" "<MODE>")])
1604 (define_expand "<code><mode>3"
1605 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1606 (any_logic:AVX256MODEF2P
1607 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1608 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1609 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1612 (define_insn "*avx_<code><mode>3"
1613 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1614 (any_logic:AVXMODEF2P
1615 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1616 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1617 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1620 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1621 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1623 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1625 [(set_attr "type" "sselog")
1626 (set_attr "prefix" "vex")
1627 (set_attr "mode" "<avxvecmode>")])
1629 (define_expand "<code><mode>3"
1630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1631 (any_logic:SSEMODEF2P
1632 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1633 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1634 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1635 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1637 (define_insn "*<code><mode>3"
1638 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1639 (any_logic:SSEMODEF2P
1640 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1641 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1642 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1643 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1645 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1646 return "<logic>ps\t{%2, %0|%0, %2}";
1648 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1650 [(set_attr "type" "sselog")
1651 (set_attr "mode" "<MODE>")])
1653 (define_expand "copysign<mode>3"
1656 (not:SSEMODEF2P (match_dup 3))
1657 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1659 (and:SSEMODEF2P (match_dup 3)
1660 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1661 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1662 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1663 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1665 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1667 operands[4] = gen_reg_rtx (<MODE>mode);
1668 operands[5] = gen_reg_rtx (<MODE>mode);
1671 ;; Also define scalar versions. These are used for abs, neg, and
1672 ;; conditional move. Using subregs into vector modes causes register
1673 ;; allocation lossage. These patterns do not allow memory operands
1674 ;; because the native instructions read the full 128-bits.
1676 (define_insn "*avx_andnot<mode>3"
1677 [(set (match_operand:MODEF 0 "register_operand" "=x")
1680 (match_operand:MODEF 1 "register_operand" "x"))
1681 (match_operand:MODEF 2 "register_operand" "x")))]
1682 "AVX_FLOAT_MODE_P (<MODE>mode)"
1683 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1684 [(set_attr "type" "sselog")
1685 (set_attr "prefix" "vex")
1686 (set_attr "mode" "<ssevecmode>")])
1688 (define_insn "*andnot<mode>3"
1689 [(set (match_operand:MODEF 0 "register_operand" "=x")
1692 (match_operand:MODEF 1 "register_operand" "0"))
1693 (match_operand:MODEF 2 "register_operand" "x")))]
1694 "SSE_FLOAT_MODE_P (<MODE>mode)"
1695 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "sselog")
1697 (set_attr "mode" "<ssevecmode>")])
1699 (define_insn "*avx_<code><mode>3"
1700 [(set (match_operand:MODEF 0 "register_operand" "=x")
1702 (match_operand:MODEF 1 "register_operand" "x")
1703 (match_operand:MODEF 2 "register_operand" "x")))]
1704 "AVX_FLOAT_MODE_P (<MODE>mode)"
1706 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1707 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1709 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1711 [(set_attr "type" "sselog")
1712 (set_attr "prefix" "vex")
1713 (set_attr "mode" "<ssevecmode>")])
1715 (define_insn "*<code><mode>3"
1716 [(set (match_operand:MODEF 0 "register_operand" "=x")
1718 (match_operand:MODEF 1 "register_operand" "0")
1719 (match_operand:MODEF 2 "register_operand" "x")))]
1720 "SSE_FLOAT_MODE_P (<MODE>mode)"
1722 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1723 return "<logic>ps\t{%2, %0|%0, %2}";
1725 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1727 [(set_attr "type" "sselog")
1728 (set_attr "mode" "<ssevecmode>")])
1730 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1732 ;; FMA4 floating point multiply/accumulate instructions. This
1733 ;; includes the scalar version of the instructions as well as the
1736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1738 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1739 ;; combine to generate a multiply/add with two memory references. We then
1740 ;; split this insn, into loading up the destination register with one of the
1741 ;; memory operations. If we don't manage to split the insn, reload will
1742 ;; generate the appropriate moves. The reason this is needed, is that combine
1743 ;; has already folded one of the memory references into both the multiply and
1744 ;; add insns, and it can't generate a new pseudo. I.e.:
1745 ;; (set (reg1) (mem (addr1)))
1746 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1747 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1749 (define_insn "fma4_fmadd<mode>4256"
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1753 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1754 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1755 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1756 "TARGET_FMA4 && TARGET_FUSED_MADD"
1757 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1758 [(set_attr "type" "ssemuladd")
1759 (set_attr "mode" "<MODE>")])
1761 ;; Floating multiply and subtract.
1762 (define_insn "fma4_fmsub<mode>4256"
1763 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1766 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1767 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1768 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1769 "TARGET_FMA4 && TARGET_FUSED_MADD"
1770 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 ;; Floating point negative multiply and add.
1775 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1776 (define_insn "fma4_fnmadd<mode>4256"
1777 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1779 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1781 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1782 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1783 "TARGET_FMA4 && TARGET_FUSED_MADD"
1784 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1785 [(set_attr "type" "ssemuladd")
1786 (set_attr "mode" "<MODE>")])
1788 ;; Floating point negative multiply and subtract.
1789 (define_insn "fma4_fnmsub<mode>4256"
1790 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1794 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1795 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1796 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1797 "TARGET_FMA4 && TARGET_FUSED_MADD"
1798 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1799 [(set_attr "type" "ssemuladd")
1800 (set_attr "mode" "<MODE>")])
1802 (define_insn "fma4_fmadd<mode>4"
1803 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1806 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1807 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1808 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1809 "TARGET_FMA4 && TARGET_FUSED_MADD"
1810 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1811 [(set_attr "type" "ssemuladd")
1812 (set_attr "mode" "<MODE>")])
1814 ;; For the scalar operations, use operand1 for the upper words that aren't
1815 ;; modified, so restrict the forms that are generated.
1816 ;; Scalar version of fmadd.
1817 (define_insn "fma4_vmfmadd<mode>4"
1818 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1819 (vec_merge:SSEMODEF2P
1822 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1823 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1824 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1827 "TARGET_FMA4 && TARGET_FUSED_MADD"
1828 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1829 [(set_attr "type" "ssemuladd")
1830 (set_attr "mode" "<MODE>")])
1832 ;; Floating multiply and subtract.
1833 ;; Allow two memory operands the same as fmadd.
1834 (define_insn "fma4_fmsub<mode>4"
1835 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1838 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1839 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1840 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1841 "TARGET_FMA4 && TARGET_FUSED_MADD"
1842 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1843 [(set_attr "type" "ssemuladd")
1844 (set_attr "mode" "<MODE>")])
1846 ;; For the scalar operations, use operand1 for the upper words that aren't
1847 ;; modified, so restrict the forms that are generated.
1848 ;; Scalar version of fmsub.
1849 (define_insn "fma4_vmfmsub<mode>4"
1850 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1851 (vec_merge:SSEMODEF2P
1854 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1855 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1856 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1859 "TARGET_FMA4 && TARGET_FUSED_MADD"
1860 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1861 [(set_attr "type" "ssemuladd")
1862 (set_attr "mode" "<MODE>")])
1864 ;; Floating point negative multiply and add.
1865 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1866 (define_insn "fma4_fnmadd<mode>4"
1867 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1869 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1871 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1872 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1873 "TARGET_FMA4 && TARGET_FUSED_MADD"
1874 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1875 [(set_attr "type" "ssemuladd")
1876 (set_attr "mode" "<MODE>")])
1878 ;; For the scalar operations, use operand1 for the upper words that aren't
1879 ;; modified, so restrict the forms that are generated.
1880 ;; Scalar version of fnmadd.
1881 (define_insn "fma4_vmfnmadd<mode>4"
1882 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1883 (vec_merge:SSEMODEF2P
1885 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1887 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1888 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1891 "TARGET_FMA4 && TARGET_FUSED_MADD"
1892 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1893 [(set_attr "type" "ssemuladd")
1894 (set_attr "mode" "<MODE>")])
1896 ;; Floating point negative multiply and subtract.
1897 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1898 (define_insn "fma4_fnmsub<mode>4"
1899 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1903 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1904 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1905 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1906 "TARGET_FMA4 && TARGET_FUSED_MADD"
1907 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1908 [(set_attr "type" "ssemuladd")
1909 (set_attr "mode" "<MODE>")])
1911 ;; For the scalar operations, use operand1 for the upper words that aren't
1912 ;; modified, so restrict the forms that are generated.
1913 ;; Scalar version of fnmsub.
1914 (define_insn "fma4_vmfnmsub<mode>4"
1915 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1916 (vec_merge:SSEMODEF2P
1920 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1921 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1922 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1925 "TARGET_FMA4 && TARGET_FUSED_MADD"
1926 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1927 [(set_attr "type" "ssemuladd")
1928 (set_attr "mode" "<MODE>")])
1930 (define_insn "fma4i_fmadd<mode>4256"
1931 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1935 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1936 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1937 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1938 UNSPEC_FMA4_INTRINSIC))]
1940 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1941 [(set_attr "type" "ssemuladd")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "fma4i_fmsub<mode>4256"
1945 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1949 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1950 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1951 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1952 UNSPEC_FMA4_INTRINSIC))]
1954 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1955 [(set_attr "type" "ssemuladd")
1956 (set_attr "mode" "<MODE>")])
1958 (define_insn "fma4i_fnmadd<mode>4256"
1959 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1962 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1964 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1965 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1966 UNSPEC_FMA4_INTRINSIC))]
1968 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1969 [(set_attr "type" "ssemuladd")
1970 (set_attr "mode" "<MODE>")])
1972 (define_insn "fma4i_fnmsub<mode>4256"
1973 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1978 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1979 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1980 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1981 UNSPEC_FMA4_INTRINSIC))]
1983 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1984 [(set_attr "type" "ssemuladd")
1985 (set_attr "mode" "<MODE>")])
1987 (define_insn "fma4i_fmadd<mode>4"
1988 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1992 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1993 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1994 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1995 UNSPEC_FMA4_INTRINSIC))]
1997 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1998 [(set_attr "type" "ssemuladd")
1999 (set_attr "mode" "<MODE>")])
2001 (define_insn "fma4i_fmsub<mode>4"
2002 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2006 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2007 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2008 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2009 UNSPEC_FMA4_INTRINSIC))]
2011 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2012 [(set_attr "type" "ssemuladd")
2013 (set_attr "mode" "<MODE>")])
2015 (define_insn "fma4i_fnmadd<mode>4"
2016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2019 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2023 UNSPEC_FMA4_INTRINSIC))]
2025 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2026 [(set_attr "type" "ssemuladd")
2027 (set_attr "mode" "<MODE>")])
2029 (define_insn "fma4i_fnmsub<mode>4"
2030 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2035 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2036 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2037 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2038 UNSPEC_FMA4_INTRINSIC))]
2040 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2041 [(set_attr "type" "ssemuladd")
2042 (set_attr "mode" "<MODE>")])
2044 ;; For the scalar operations, use operand1 for the upper words that aren't
2045 ;; modified, so restrict the forms that are accepted.
2046 (define_insn "fma4i_vmfmadd<mode>4"
2047 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2049 [(vec_merge:SSEMODEF2P
2052 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2053 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2054 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2057 UNSPEC_FMA4_INTRINSIC))]
2059 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2060 [(set_attr "type" "ssemuladd")
2061 (set_attr "mode" "<ssescalarmode>")])
2063 (define_insn "fma4i_vmfmsub<mode>4"
2064 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2066 [(vec_merge:SSEMODEF2P
2069 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2070 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2071 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2074 UNSPEC_FMA4_INTRINSIC))]
2076 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2077 [(set_attr "type" "ssemuladd")
2078 (set_attr "mode" "<ssescalarmode>")])
2080 (define_insn "fma4i_vmfnmadd<mode>4"
2081 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2083 [(vec_merge:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2087 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2088 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2091 UNSPEC_FMA4_INTRINSIC))]
2093 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2094 [(set_attr "type" "ssemuladd")
2095 (set_attr "mode" "<ssescalarmode>")])
2097 (define_insn "fma4i_vmfnmsub<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2100 [(vec_merge:SSEMODEF2P
2104 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2105 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2106 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2109 UNSPEC_FMA4_INTRINSIC))]
2111 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "<ssescalarmode>")])
2115 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2117 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2119 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2121 (define_insn "fma4_fmaddsubv8sf4"
2122 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2126 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2127 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2128 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2135 "TARGET_FMA4 && TARGET_FUSED_MADD"
2136 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2137 [(set_attr "type" "ssemuladd")
2138 (set_attr "mode" "V8SF")])
2140 (define_insn "fma4_fmaddsubv4df4"
2141 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2145 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2146 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2147 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2154 "TARGET_FMA4 && TARGET_FUSED_MADD"
2155 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2156 [(set_attr "type" "ssemuladd")
2157 (set_attr "mode" "V4DF")])
2159 (define_insn "fma4_fmaddsubv4sf4"
2160 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2164 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2165 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2166 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2173 "TARGET_FMA4 && TARGET_FUSED_MADD"
2174 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2175 [(set_attr "type" "ssemuladd")
2176 (set_attr "mode" "V4SF")])
2178 (define_insn "fma4_fmaddsubv2df4"
2179 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2183 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2184 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2185 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2192 "TARGET_FMA4 && TARGET_FUSED_MADD"
2193 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2194 [(set_attr "type" "ssemuladd")
2195 (set_attr "mode" "V2DF")])
2197 (define_insn "fma4_fmsubaddv8sf4"
2198 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2202 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2203 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2204 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2211 "TARGET_FMA4 && TARGET_FUSED_MADD"
2212 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2213 [(set_attr "type" "ssemuladd")
2214 (set_attr "mode" "V8SF")])
2216 (define_insn "fma4_fmsubaddv4df4"
2217 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2221 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2222 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2223 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2230 "TARGET_FMA4 && TARGET_FUSED_MADD"
2231 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2232 [(set_attr "type" "ssemuladd")
2233 (set_attr "mode" "V4DF")])
2235 (define_insn "fma4_fmsubaddv4sf4"
2236 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2240 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2241 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2242 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2249 "TARGET_FMA4 && TARGET_FUSED_MADD"
2250 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2251 [(set_attr "type" "ssemuladd")
2252 (set_attr "mode" "V4SF")])
2254 (define_insn "fma4_fmsubaddv2df4"
2255 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2259 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2260 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2261 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2268 "TARGET_FMA4 && TARGET_FUSED_MADD"
2269 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2270 [(set_attr "type" "ssemuladd")
2271 (set_attr "mode" "V2DF")])
2273 (define_insn "fma4i_fmaddsubv8sf4"
2274 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2279 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2280 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2281 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2288 UNSPEC_FMA4_INTRINSIC))]
2290 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2291 [(set_attr "type" "ssemuladd")
2292 (set_attr "mode" "V8SF")])
2294 (define_insn "fma4i_fmaddsubv4df4"
2295 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2300 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2301 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2302 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2309 UNSPEC_FMA4_INTRINSIC))]
2311 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2312 [(set_attr "type" "ssemuladd")
2313 (set_attr "mode" "V4DF")])
2315 (define_insn "fma4i_fmaddsubv4sf4"
2316 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2321 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2322 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2323 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2330 UNSPEC_FMA4_INTRINSIC))]
2332 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2333 [(set_attr "type" "ssemuladd")
2334 (set_attr "mode" "V4SF")])
2336 (define_insn "fma4i_fmaddsubv2df4"
2337 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2342 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2343 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2344 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2351 UNSPEC_FMA4_INTRINSIC))]
2353 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2354 [(set_attr "type" "ssemuladd")
2355 (set_attr "mode" "V2DF")])
2357 (define_insn "fma4i_fmsubaddv8sf4"
2358 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2363 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2364 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2365 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2372 UNSPEC_FMA4_INTRINSIC))]
2374 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2375 [(set_attr "type" "ssemuladd")
2376 (set_attr "mode" "V8SF")])
2378 (define_insn "fma4i_fmsubaddv4df4"
2379 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2384 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2385 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2386 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2393 UNSPEC_FMA4_INTRINSIC))]
2395 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2396 [(set_attr "type" "ssemuladd")
2397 (set_attr "mode" "V4DF")])
2399 (define_insn "fma4i_fmsubaddv4sf4"
2400 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2405 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2406 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2407 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2414 UNSPEC_FMA4_INTRINSIC))]
2416 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2417 [(set_attr "type" "ssemuladd")
2418 (set_attr "mode" "V4SF")])
2420 (define_insn "fma4i_fmsubaddv2df4"
2421 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2426 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2427 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2428 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2435 UNSPEC_FMA4_INTRINSIC))]
2437 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2438 [(set_attr "type" "ssemuladd")
2439 (set_attr "mode" "V2DF")])
2441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2443 ;; Parallel single-precision floating point conversion operations
2445 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2447 (define_insn "sse_cvtpi2ps"
2448 [(set (match_operand:V4SF 0 "register_operand" "=x")
2451 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2452 (match_operand:V4SF 1 "register_operand" "0")
2455 "cvtpi2ps\t{%2, %0|%0, %2}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "mode" "V4SF")])
2459 (define_insn "sse_cvtps2pi"
2460 [(set (match_operand:V2SI 0 "register_operand" "=y")
2462 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2464 (parallel [(const_int 0) (const_int 1)])))]
2466 "cvtps2pi\t{%1, %0|%0, %1}"
2467 [(set_attr "type" "ssecvt")
2468 (set_attr "unit" "mmx")
2469 (set_attr "mode" "DI")])
2471 (define_insn "sse_cvttps2pi"
2472 [(set (match_operand:V2SI 0 "register_operand" "=y")
2474 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2475 (parallel [(const_int 0) (const_int 1)])))]
2477 "cvttps2pi\t{%1, %0|%0, %1}"
2478 [(set_attr "type" "ssecvt")
2479 (set_attr "unit" "mmx")
2480 (set_attr "prefix_rep" "0")
2481 (set_attr "mode" "SF")])
2483 (define_insn "*avx_cvtsi2ss"
2484 [(set (match_operand:V4SF 0 "register_operand" "=x")
2487 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2488 (match_operand:V4SF 1 "register_operand" "x")
2491 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2492 [(set_attr "type" "sseicvt")
2493 (set_attr "prefix" "vex")
2494 (set_attr "mode" "SF")])
2496 (define_insn "sse_cvtsi2ss"
2497 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2500 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2501 (match_operand:V4SF 1 "register_operand" "0,0")
2504 "cvtsi2ss\t{%2, %0|%0, %2}"
2505 [(set_attr "type" "sseicvt")
2506 (set_attr "athlon_decode" "vector,double")
2507 (set_attr "amdfam10_decode" "vector,double")
2508 (set_attr "mode" "SF")])
2510 (define_insn "*avx_cvtsi2ssq"
2511 [(set (match_operand:V4SF 0 "register_operand" "=x")
2514 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2515 (match_operand:V4SF 1 "register_operand" "x")
2517 "TARGET_AVX && TARGET_64BIT"
2518 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2519 [(set_attr "type" "sseicvt")
2520 (set_attr "length_vex" "4")
2521 (set_attr "prefix" "vex")
2522 (set_attr "mode" "SF")])
2524 (define_insn "sse_cvtsi2ssq"
2525 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2528 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2529 (match_operand:V4SF 1 "register_operand" "0,0")
2531 "TARGET_SSE && TARGET_64BIT"
2532 "cvtsi2ssq\t{%2, %0|%0, %2}"
2533 [(set_attr "type" "sseicvt")
2534 (set_attr "prefix_rex" "1")
2535 (set_attr "athlon_decode" "vector,double")
2536 (set_attr "amdfam10_decode" "vector,double")
2537 (set_attr "mode" "SF")])
2539 (define_insn "sse_cvtss2si"
2540 [(set (match_operand:SI 0 "register_operand" "=r,r")
2543 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2544 (parallel [(const_int 0)]))]
2545 UNSPEC_FIX_NOTRUNC))]
2547 "%vcvtss2si\t{%1, %0|%0, %1}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "athlon_decode" "double,vector")
2550 (set_attr "prefix_rep" "1")
2551 (set_attr "prefix" "maybe_vex")
2552 (set_attr "mode" "SI")])
2554 (define_insn "sse_cvtss2si_2"
2555 [(set (match_operand:SI 0 "register_operand" "=r,r")
2556 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2557 UNSPEC_FIX_NOTRUNC))]
2559 "%vcvtss2si\t{%1, %0|%0, %1}"
2560 [(set_attr "type" "sseicvt")
2561 (set_attr "athlon_decode" "double,vector")
2562 (set_attr "amdfam10_decode" "double,double")
2563 (set_attr "prefix_rep" "1")
2564 (set_attr "prefix" "maybe_vex")
2565 (set_attr "mode" "SI")])
2567 (define_insn "sse_cvtss2siq"
2568 [(set (match_operand:DI 0 "register_operand" "=r,r")
2571 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2572 (parallel [(const_int 0)]))]
2573 UNSPEC_FIX_NOTRUNC))]
2574 "TARGET_SSE && TARGET_64BIT"
2575 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2576 [(set_attr "type" "sseicvt")
2577 (set_attr "athlon_decode" "double,vector")
2578 (set_attr "prefix_rep" "1")
2579 (set_attr "prefix" "maybe_vex")
2580 (set_attr "mode" "DI")])
2582 (define_insn "sse_cvtss2siq_2"
2583 [(set (match_operand:DI 0 "register_operand" "=r,r")
2584 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2585 UNSPEC_FIX_NOTRUNC))]
2586 "TARGET_SSE && TARGET_64BIT"
2587 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2588 [(set_attr "type" "sseicvt")
2589 (set_attr "athlon_decode" "double,vector")
2590 (set_attr "amdfam10_decode" "double,double")
2591 (set_attr "prefix_rep" "1")
2592 (set_attr "prefix" "maybe_vex")
2593 (set_attr "mode" "DI")])
2595 (define_insn "sse_cvttss2si"
2596 [(set (match_operand:SI 0 "register_operand" "=r,r")
2599 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2600 (parallel [(const_int 0)]))))]
2602 "%vcvttss2si\t{%1, %0|%0, %1}"
2603 [(set_attr "type" "sseicvt")
2604 (set_attr "athlon_decode" "double,vector")
2605 (set_attr "amdfam10_decode" "double,double")
2606 (set_attr "prefix_rep" "1")
2607 (set_attr "prefix" "maybe_vex")
2608 (set_attr "mode" "SI")])
2610 (define_insn "sse_cvttss2siq"
2611 [(set (match_operand:DI 0 "register_operand" "=r,r")
2614 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2615 (parallel [(const_int 0)]))))]
2616 "TARGET_SSE && TARGET_64BIT"
2617 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2618 [(set_attr "type" "sseicvt")
2619 (set_attr "athlon_decode" "double,vector")
2620 (set_attr "amdfam10_decode" "double,double")
2621 (set_attr "prefix_rep" "1")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "DI")])
2625 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2626 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2627 (float:AVXMODEDCVTDQ2PS
2628 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2630 "vcvtdq2ps\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "ssecvt")
2632 (set_attr "prefix" "vex")
2633 (set_attr "mode" "<avxvecmode>")])
2635 (define_insn "sse2_cvtdq2ps"
2636 [(set (match_operand:V4SF 0 "register_operand" "=x")
2637 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2639 "cvtdq2ps\t{%1, %0|%0, %1}"
2640 [(set_attr "type" "ssecvt")
2641 (set_attr "mode" "V4SF")])
2643 (define_expand "sse2_cvtudq2ps"
2645 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2647 (lt:V4SF (match_dup 5) (match_dup 3)))
2649 (and:V4SF (match_dup 6) (match_dup 4)))
2650 (set (match_operand:V4SF 0 "register_operand" "")
2651 (plus:V4SF (match_dup 5) (match_dup 7)))]
2654 REAL_VALUE_TYPE TWO32r;
2658 real_ldexp (&TWO32r, &dconst1, 32);
2659 x = const_double_from_real_value (TWO32r, SFmode);
2661 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2662 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2664 for (i = 5; i < 8; i++)
2665 operands[i] = gen_reg_rtx (V4SFmode);
2668 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2669 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2670 (unspec:AVXMODEDCVTPS2DQ
2671 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2672 UNSPEC_FIX_NOTRUNC))]
2674 "vcvtps2dq\t{%1, %0|%0, %1}"
2675 [(set_attr "type" "ssecvt")
2676 (set_attr "prefix" "vex")
2677 (set_attr "mode" "<avxvecmode>")])
2679 (define_insn "sse2_cvtps2dq"
2680 [(set (match_operand:V4SI 0 "register_operand" "=x")
2681 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2682 UNSPEC_FIX_NOTRUNC))]
2684 "cvtps2dq\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix_data16" "1")
2687 (set_attr "mode" "TI")])
2689 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2690 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2691 (fix:AVXMODEDCVTPS2DQ
2692 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2694 "vcvttps2dq\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "prefix" "vex")
2697 (set_attr "mode" "<avxvecmode>")])
2699 (define_insn "sse2_cvttps2dq"
2700 [(set (match_operand:V4SI 0 "register_operand" "=x")
2701 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2703 "cvttps2dq\t{%1, %0|%0, %1}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "prefix_rep" "1")
2706 (set_attr "prefix_data16" "0")
2707 (set_attr "mode" "TI")])
2709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2711 ;; Parallel double-precision floating point conversion operations
2713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2715 (define_insn "sse2_cvtpi2pd"
2716 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2717 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2719 "cvtpi2pd\t{%1, %0|%0, %1}"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "unit" "mmx,*")
2722 (set_attr "prefix_data16" "1,*")
2723 (set_attr "mode" "V2DF")])
2725 (define_insn "sse2_cvtpd2pi"
2726 [(set (match_operand:V2SI 0 "register_operand" "=y")
2727 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2728 UNSPEC_FIX_NOTRUNC))]
2730 "cvtpd2pi\t{%1, %0|%0, %1}"
2731 [(set_attr "type" "ssecvt")
2732 (set_attr "unit" "mmx")
2733 (set_attr "prefix_data16" "1")
2734 (set_attr "mode" "DI")])
2736 (define_insn "sse2_cvttpd2pi"
2737 [(set (match_operand:V2SI 0 "register_operand" "=y")
2738 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2740 "cvttpd2pi\t{%1, %0|%0, %1}"
2741 [(set_attr "type" "ssecvt")
2742 (set_attr "unit" "mmx")
2743 (set_attr "prefix_data16" "1")
2744 (set_attr "mode" "TI")])
2746 (define_insn "*avx_cvtsi2sd"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2750 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2751 (match_operand:V2DF 1 "register_operand" "x")
2754 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2755 [(set_attr "type" "sseicvt")
2756 (set_attr "prefix" "vex")
2757 (set_attr "mode" "DF")])
2759 (define_insn "sse2_cvtsi2sd"
2760 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2763 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2764 (match_operand:V2DF 1 "register_operand" "0,0")
2767 "cvtsi2sd\t{%2, %0|%0, %2}"
2768 [(set_attr "type" "sseicvt")
2769 (set_attr "mode" "DF")
2770 (set_attr "athlon_decode" "double,direct")
2771 (set_attr "amdfam10_decode" "vector,double")])
2773 (define_insn "*avx_cvtsi2sdq"
2774 [(set (match_operand:V2DF 0 "register_operand" "=x")
2777 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2778 (match_operand:V2DF 1 "register_operand" "x")
2780 "TARGET_AVX && TARGET_64BIT"
2781 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2782 [(set_attr "type" "sseicvt")
2783 (set_attr "length_vex" "4")
2784 (set_attr "prefix" "vex")
2785 (set_attr "mode" "DF")])
2787 (define_insn "sse2_cvtsi2sdq"
2788 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2791 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2792 (match_operand:V2DF 1 "register_operand" "0,0")
2794 "TARGET_SSE2 && TARGET_64BIT"
2795 "cvtsi2sdq\t{%2, %0|%0, %2}"
2796 [(set_attr "type" "sseicvt")
2797 (set_attr "prefix_rex" "1")
2798 (set_attr "mode" "DF")
2799 (set_attr "athlon_decode" "double,direct")
2800 (set_attr "amdfam10_decode" "vector,double")])
2802 (define_insn "sse2_cvtsd2si"
2803 [(set (match_operand:SI 0 "register_operand" "=r,r")
2806 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2807 (parallel [(const_int 0)]))]
2808 UNSPEC_FIX_NOTRUNC))]
2810 "%vcvtsd2si\t{%1, %0|%0, %1}"
2811 [(set_attr "type" "sseicvt")
2812 (set_attr "athlon_decode" "double,vector")
2813 (set_attr "prefix_rep" "1")
2814 (set_attr "prefix" "maybe_vex")
2815 (set_attr "mode" "SI")])
2817 (define_insn "sse2_cvtsd2si_2"
2818 [(set (match_operand:SI 0 "register_operand" "=r,r")
2819 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2820 UNSPEC_FIX_NOTRUNC))]
2822 "%vcvtsd2si\t{%1, %0|%0, %1}"
2823 [(set_attr "type" "sseicvt")
2824 (set_attr "athlon_decode" "double,vector")
2825 (set_attr "amdfam10_decode" "double,double")
2826 (set_attr "prefix_rep" "1")
2827 (set_attr "prefix" "maybe_vex")
2828 (set_attr "mode" "SI")])
2830 (define_insn "sse2_cvtsd2siq"
2831 [(set (match_operand:DI 0 "register_operand" "=r,r")
2834 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2835 (parallel [(const_int 0)]))]
2836 UNSPEC_FIX_NOTRUNC))]
2837 "TARGET_SSE2 && TARGET_64BIT"
2838 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2839 [(set_attr "type" "sseicvt")
2840 (set_attr "athlon_decode" "double,vector")
2841 (set_attr "prefix_rep" "1")
2842 (set_attr "prefix" "maybe_vex")
2843 (set_attr "mode" "DI")])
2845 (define_insn "sse2_cvtsd2siq_2"
2846 [(set (match_operand:DI 0 "register_operand" "=r,r")
2847 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2848 UNSPEC_FIX_NOTRUNC))]
2849 "TARGET_SSE2 && TARGET_64BIT"
2850 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2851 [(set_attr "type" "sseicvt")
2852 (set_attr "athlon_decode" "double,vector")
2853 (set_attr "amdfam10_decode" "double,double")
2854 (set_attr "prefix_rep" "1")
2855 (set_attr "prefix" "maybe_vex")
2856 (set_attr "mode" "DI")])
2858 (define_insn "sse2_cvttsd2si"
2859 [(set (match_operand:SI 0 "register_operand" "=r,r")
2862 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2863 (parallel [(const_int 0)]))))]
2865 "%vcvttsd2si\t{%1, %0|%0, %1}"
2866 [(set_attr "type" "sseicvt")
2867 (set_attr "prefix_rep" "1")
2868 (set_attr "prefix" "maybe_vex")
2869 (set_attr "mode" "SI")
2870 (set_attr "athlon_decode" "double,vector")
2871 (set_attr "amdfam10_decode" "double,double")])
2873 (define_insn "sse2_cvttsd2siq"
2874 [(set (match_operand:DI 0 "register_operand" "=r,r")
2877 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2878 (parallel [(const_int 0)]))))]
2879 "TARGET_SSE2 && TARGET_64BIT"
2880 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2881 [(set_attr "type" "sseicvt")
2882 (set_attr "prefix_rep" "1")
2883 (set_attr "prefix" "maybe_vex")
2884 (set_attr "mode" "DI")
2885 (set_attr "athlon_decode" "double,vector")
2886 (set_attr "amdfam10_decode" "double,double")])
2888 (define_insn "avx_cvtdq2pd256"
2889 [(set (match_operand:V4DF 0 "register_operand" "=x")
2890 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2892 "vcvtdq2pd\t{%1, %0|%0, %1}"
2893 [(set_attr "type" "ssecvt")
2894 (set_attr "prefix" "vex")
2895 (set_attr "mode" "V4DF")])
2897 (define_insn "sse2_cvtdq2pd"
2898 [(set (match_operand:V2DF 0 "register_operand" "=x")
2901 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2902 (parallel [(const_int 0) (const_int 1)]))))]
2904 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2905 [(set_attr "type" "ssecvt")
2906 (set_attr "prefix" "maybe_vex")
2907 (set_attr "mode" "V2DF")])
2909 (define_insn "avx_cvtpd2dq256"
2910 [(set (match_operand:V4SI 0 "register_operand" "=x")
2911 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2912 UNSPEC_FIX_NOTRUNC))]
2914 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2915 [(set_attr "type" "ssecvt")
2916 (set_attr "prefix" "vex")
2917 (set_attr "mode" "OI")])
2919 (define_expand "sse2_cvtpd2dq"
2920 [(set (match_operand:V4SI 0 "register_operand" "")
2922 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2926 "operands[2] = CONST0_RTX (V2SImode);")
2928 (define_insn "*sse2_cvtpd2dq"
2929 [(set (match_operand:V4SI 0 "register_operand" "=x")
2931 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2933 (match_operand:V2SI 2 "const0_operand" "")))]
2935 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2936 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2937 [(set_attr "type" "ssecvt")
2938 (set_attr "prefix_rep" "1")
2939 (set_attr "prefix_data16" "0")
2940 (set_attr "prefix" "maybe_vex")
2941 (set_attr "mode" "TI")
2942 (set_attr "amdfam10_decode" "double")])
2944 (define_insn "avx_cvttpd2dq256"
2945 [(set (match_operand:V4SI 0 "register_operand" "=x")
2946 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2948 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2949 [(set_attr "type" "ssecvt")
2950 (set_attr "prefix" "vex")
2951 (set_attr "mode" "OI")])
2953 (define_expand "sse2_cvttpd2dq"
2954 [(set (match_operand:V4SI 0 "register_operand" "")
2956 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2959 "operands[2] = CONST0_RTX (V2SImode);")
2961 (define_insn "*sse2_cvttpd2dq"
2962 [(set (match_operand:V4SI 0 "register_operand" "=x")
2964 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2965 (match_operand:V2SI 2 "const0_operand" "")))]
2967 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2968 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2969 [(set_attr "type" "ssecvt")
2970 (set_attr "prefix" "maybe_vex")
2971 (set_attr "mode" "TI")
2972 (set_attr "amdfam10_decode" "double")])
2974 (define_insn "*avx_cvtsd2ss"
2975 [(set (match_operand:V4SF 0 "register_operand" "=x")
2978 (float_truncate:V2SF
2979 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2980 (match_operand:V4SF 1 "register_operand" "x")
2983 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2984 [(set_attr "type" "ssecvt")
2985 (set_attr "prefix" "vex")
2986 (set_attr "mode" "SF")])
2988 (define_insn "sse2_cvtsd2ss"
2989 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2992 (float_truncate:V2SF
2993 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2994 (match_operand:V4SF 1 "register_operand" "0,0")
2997 "cvtsd2ss\t{%2, %0|%0, %2}"
2998 [(set_attr "type" "ssecvt")
2999 (set_attr "athlon_decode" "vector,double")
3000 (set_attr "amdfam10_decode" "vector,double")
3001 (set_attr "mode" "SF")])
3003 (define_insn "*avx_cvtss2sd"
3004 [(set (match_operand:V2DF 0 "register_operand" "=x")
3008 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3009 (parallel [(const_int 0) (const_int 1)])))
3010 (match_operand:V2DF 1 "register_operand" "x")
3013 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3014 [(set_attr "type" "ssecvt")
3015 (set_attr "prefix" "vex")
3016 (set_attr "mode" "DF")])
3018 (define_insn "sse2_cvtss2sd"
3019 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3023 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3024 (parallel [(const_int 0) (const_int 1)])))
3025 (match_operand:V2DF 1 "register_operand" "0,0")
3028 "cvtss2sd\t{%2, %0|%0, %2}"
3029 [(set_attr "type" "ssecvt")
3030 (set_attr "amdfam10_decode" "vector,double")
3031 (set_attr "mode" "DF")])
3033 (define_insn "avx_cvtpd2ps256"
3034 [(set (match_operand:V4SF 0 "register_operand" "=x")
3035 (float_truncate:V4SF
3036 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3038 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3039 [(set_attr "type" "ssecvt")
3040 (set_attr "prefix" "vex")
3041 (set_attr "mode" "V4SF")])
3043 (define_expand "sse2_cvtpd2ps"
3044 [(set (match_operand:V4SF 0 "register_operand" "")
3046 (float_truncate:V2SF
3047 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3050 "operands[2] = CONST0_RTX (V2SFmode);")
3052 (define_insn "*sse2_cvtpd2ps"
3053 [(set (match_operand:V4SF 0 "register_operand" "=x")
3055 (float_truncate:V2SF
3056 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3057 (match_operand:V2SF 2 "const0_operand" "")))]
3059 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3060 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3061 [(set_attr "type" "ssecvt")
3062 (set_attr "prefix_data16" "1")
3063 (set_attr "prefix" "maybe_vex")
3064 (set_attr "mode" "V4SF")
3065 (set_attr "amdfam10_decode" "double")])
3067 (define_insn "avx_cvtps2pd256"
3068 [(set (match_operand:V4DF 0 "register_operand" "=x")
3070 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3072 "vcvtps2pd\t{%1, %0|%0, %1}"
3073 [(set_attr "type" "ssecvt")
3074 (set_attr "prefix" "vex")
3075 (set_attr "mode" "V4DF")])
3077 (define_insn "sse2_cvtps2pd"
3078 [(set (match_operand:V2DF 0 "register_operand" "=x")
3081 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3082 (parallel [(const_int 0) (const_int 1)]))))]
3084 "%vcvtps2pd\t{%1, %0|%0, %1}"
3085 [(set_attr "type" "ssecvt")
3086 (set_attr "prefix" "maybe_vex")
3087 (set_attr "mode" "V2DF")
3088 (set_attr "prefix_data16" "0")
3089 (set_attr "amdfam10_decode" "direct")])
3091 (define_expand "vec_unpacks_hi_v4sf"
3096 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3097 (parallel [(const_int 6)
3101 (set (match_operand:V2DF 0 "register_operand" "")
3105 (parallel [(const_int 0) (const_int 1)]))))]
3108 operands[2] = gen_reg_rtx (V4SFmode);
3111 (define_expand "vec_unpacks_lo_v4sf"
3112 [(set (match_operand:V2DF 0 "register_operand" "")
3115 (match_operand:V4SF 1 "nonimmediate_operand" "")
3116 (parallel [(const_int 0) (const_int 1)]))))]
3119 (define_expand "vec_unpacks_float_hi_v8hi"
3120 [(match_operand:V4SF 0 "register_operand" "")
3121 (match_operand:V8HI 1 "register_operand" "")]
3124 rtx tmp = gen_reg_rtx (V4SImode);
3126 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3127 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3131 (define_expand "vec_unpacks_float_lo_v8hi"
3132 [(match_operand:V4SF 0 "register_operand" "")
3133 (match_operand:V8HI 1 "register_operand" "")]
3136 rtx tmp = gen_reg_rtx (V4SImode);
3138 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3139 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3143 (define_expand "vec_unpacku_float_hi_v8hi"
3144 [(match_operand:V4SF 0 "register_operand" "")
3145 (match_operand:V8HI 1 "register_operand" "")]
3148 rtx tmp = gen_reg_rtx (V4SImode);
3150 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3151 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3155 (define_expand "vec_unpacku_float_lo_v8hi"
3156 [(match_operand:V4SF 0 "register_operand" "")
3157 (match_operand:V8HI 1 "register_operand" "")]
3160 rtx tmp = gen_reg_rtx (V4SImode);
3162 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3163 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3167 (define_expand "vec_unpacks_float_hi_v4si"
3170 (match_operand:V4SI 1 "nonimmediate_operand" "")
3171 (parallel [(const_int 2)
3175 (set (match_operand:V2DF 0 "register_operand" "")
3179 (parallel [(const_int 0) (const_int 1)]))))]
3181 "operands[2] = gen_reg_rtx (V4SImode);")
3183 (define_expand "vec_unpacks_float_lo_v4si"
3184 [(set (match_operand:V2DF 0 "register_operand" "")
3187 (match_operand:V4SI 1 "nonimmediate_operand" "")
3188 (parallel [(const_int 0) (const_int 1)]))))]
3191 (define_expand "vec_unpacku_float_hi_v4si"
3194 (match_operand:V4SI 1 "nonimmediate_operand" "")
3195 (parallel [(const_int 2)
3203 (parallel [(const_int 0) (const_int 1)]))))
3205 (lt:V2DF (match_dup 6) (match_dup 3)))
3207 (and:V2DF (match_dup 7) (match_dup 4)))
3208 (set (match_operand:V2DF 0 "register_operand" "")
3209 (plus:V2DF (match_dup 6) (match_dup 8)))]
3212 REAL_VALUE_TYPE TWO32r;
3216 real_ldexp (&TWO32r, &dconst1, 32);
3217 x = const_double_from_real_value (TWO32r, DFmode);
3219 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3220 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3222 operands[5] = gen_reg_rtx (V4SImode);
3224 for (i = 6; i < 9; i++)
3225 operands[i] = gen_reg_rtx (V2DFmode);
3228 (define_expand "vec_unpacku_float_lo_v4si"
3232 (match_operand:V4SI 1 "nonimmediate_operand" "")
3233 (parallel [(const_int 0) (const_int 1)]))))
3235 (lt:V2DF (match_dup 5) (match_dup 3)))
3237 (and:V2DF (match_dup 6) (match_dup 4)))
3238 (set (match_operand:V2DF 0 "register_operand" "")
3239 (plus:V2DF (match_dup 5) (match_dup 7)))]
3242 REAL_VALUE_TYPE TWO32r;
3246 real_ldexp (&TWO32r, &dconst1, 32);
3247 x = const_double_from_real_value (TWO32r, DFmode);
3249 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3250 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3252 for (i = 5; i < 8; i++)
3253 operands[i] = gen_reg_rtx (V2DFmode);
3256 (define_expand "vec_pack_trunc_v2df"
3257 [(match_operand:V4SF 0 "register_operand" "")
3258 (match_operand:V2DF 1 "nonimmediate_operand" "")
3259 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3264 r1 = gen_reg_rtx (V4SFmode);
3265 r2 = gen_reg_rtx (V4SFmode);
3267 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3268 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3269 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3273 (define_expand "vec_pack_sfix_trunc_v2df"
3274 [(match_operand:V4SI 0 "register_operand" "")
3275 (match_operand:V2DF 1 "nonimmediate_operand" "")
3276 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3281 r1 = gen_reg_rtx (V4SImode);
3282 r2 = gen_reg_rtx (V4SImode);
3284 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3285 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3286 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3287 gen_lowpart (V2DImode, r1),
3288 gen_lowpart (V2DImode, r2)));
3292 (define_expand "vec_pack_sfix_v2df"
3293 [(match_operand:V4SI 0 "register_operand" "")
3294 (match_operand:V2DF 1 "nonimmediate_operand" "")
3295 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3300 r1 = gen_reg_rtx (V4SImode);
3301 r2 = gen_reg_rtx (V4SImode);
3303 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3304 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3305 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3306 gen_lowpart (V2DImode, r1),
3307 gen_lowpart (V2DImode, r2)));
3311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3313 ;; Parallel single-precision floating point element swizzling
3315 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3317 (define_expand "sse_movhlps_exp"
3318 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3321 (match_operand:V4SF 1 "nonimmediate_operand" "")
3322 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3323 (parallel [(const_int 6)
3328 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3330 (define_insn "*avx_movhlps"
3331 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3334 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3335 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3336 (parallel [(const_int 6)
3340 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3342 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3343 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3344 vmovhps\t{%2, %0|%0, %2}"
3345 [(set_attr "type" "ssemov")
3346 (set_attr "prefix" "vex")
3347 (set_attr "mode" "V4SF,V2SF,V2SF")])
3349 (define_insn "sse_movhlps"
3350 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3353 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3354 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3355 (parallel [(const_int 6)
3359 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3361 movhlps\t{%2, %0|%0, %2}
3362 movlps\t{%H2, %0|%0, %H2}
3363 movhps\t{%2, %0|%0, %2}"
3364 [(set_attr "type" "ssemov")
3365 (set_attr "mode" "V4SF,V2SF,V2SF")])
3367 (define_expand "sse_movlhps_exp"
3368 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3371 (match_operand:V4SF 1 "nonimmediate_operand" "")
3372 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3373 (parallel [(const_int 0)
3378 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3380 (define_insn "*avx_movlhps"
3381 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3384 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3385 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3386 (parallel [(const_int 0)
3390 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3392 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3393 vmovhps\t{%2, %1, %0|%0, %1, %2}
3394 vmovlps\t{%2, %H0|%H0, %2}"
3395 [(set_attr "type" "ssemov")
3396 (set_attr "prefix" "vex")
3397 (set_attr "mode" "V4SF,V2SF,V2SF")])
3399 (define_insn "sse_movlhps"
3400 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3403 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3404 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3405 (parallel [(const_int 0)
3409 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3411 movlhps\t{%2, %0|%0, %2}
3412 movhps\t{%2, %0|%0, %2}
3413 movlps\t{%2, %H0|%H0, %2}"
3414 [(set_attr "type" "ssemov")
3415 (set_attr "mode" "V4SF,V2SF,V2SF")])
3417 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3418 (define_insn "avx_unpckhps256"
3419 [(set (match_operand:V8SF 0 "register_operand" "=x")
3422 (match_operand:V8SF 1 "register_operand" "x")
3423 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3424 (parallel [(const_int 2) (const_int 10)
3425 (const_int 3) (const_int 11)
3426 (const_int 6) (const_int 14)
3427 (const_int 7) (const_int 15)])))]
3429 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3430 [(set_attr "type" "sselog")
3431 (set_attr "prefix" "vex")
3432 (set_attr "mode" "V8SF")])
3434 (define_insn "*avx_interleave_highv4sf"
3435 [(set (match_operand:V4SF 0 "register_operand" "=x")
3438 (match_operand:V4SF 1 "register_operand" "x")
3439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3440 (parallel [(const_int 2) (const_int 6)
3441 (const_int 3) (const_int 7)])))]
3443 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3444 [(set_attr "type" "sselog")
3445 (set_attr "prefix" "vex")
3446 (set_attr "mode" "V4SF")])
3448 (define_insn "vec_interleave_highv4sf"
3449 [(set (match_operand:V4SF 0 "register_operand" "=x")
3452 (match_operand:V4SF 1 "register_operand" "0")
3453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3454 (parallel [(const_int 2) (const_int 6)
3455 (const_int 3) (const_int 7)])))]
3457 "unpckhps\t{%2, %0|%0, %2}"
3458 [(set_attr "type" "sselog")
3459 (set_attr "mode" "V4SF")])
3461 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3462 (define_insn "avx_unpcklps256"
3463 [(set (match_operand:V8SF 0 "register_operand" "=x")
3466 (match_operand:V8SF 1 "register_operand" "x")
3467 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3468 (parallel [(const_int 0) (const_int 8)
3469 (const_int 1) (const_int 9)
3470 (const_int 4) (const_int 12)
3471 (const_int 5) (const_int 13)])))]
3473 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3474 [(set_attr "type" "sselog")
3475 (set_attr "prefix" "vex")
3476 (set_attr "mode" "V8SF")])
3478 (define_insn "*avx_interleave_lowv4sf"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3482 (match_operand:V4SF 1 "register_operand" "x")
3483 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3484 (parallel [(const_int 0) (const_int 4)
3485 (const_int 1) (const_int 5)])))]
3487 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3488 [(set_attr "type" "sselog")
3489 (set_attr "prefix" "vex")
3490 (set_attr "mode" "V4SF")])
3492 (define_insn "vec_interleave_lowv4sf"
3493 [(set (match_operand:V4SF 0 "register_operand" "=x")
3496 (match_operand:V4SF 1 "register_operand" "0")
3497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3498 (parallel [(const_int 0) (const_int 4)
3499 (const_int 1) (const_int 5)])))]
3501 "unpcklps\t{%2, %0|%0, %2}"
3502 [(set_attr "type" "sselog")
3503 (set_attr "mode" "V4SF")])
3505 ;; These are modeled with the same vec_concat as the others so that we
3506 ;; capture users of shufps that can use the new instructions
3507 (define_insn "avx_movshdup256"
3508 [(set (match_operand:V8SF 0 "register_operand" "=x")
3511 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3513 (parallel [(const_int 1) (const_int 1)
3514 (const_int 3) (const_int 3)
3515 (const_int 5) (const_int 5)
3516 (const_int 7) (const_int 7)])))]
3518 "vmovshdup\t{%1, %0|%0, %1}"
3519 [(set_attr "type" "sse")
3520 (set_attr "prefix" "vex")
3521 (set_attr "mode" "V8SF")])
3523 (define_insn "sse3_movshdup"
3524 [(set (match_operand:V4SF 0 "register_operand" "=x")
3527 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3529 (parallel [(const_int 1)
3534 "%vmovshdup\t{%1, %0|%0, %1}"
3535 [(set_attr "type" "sse")
3536 (set_attr "prefix_rep" "1")
3537 (set_attr "prefix" "maybe_vex")
3538 (set_attr "mode" "V4SF")])
3540 (define_insn "avx_movsldup256"
3541 [(set (match_operand:V8SF 0 "register_operand" "=x")
3544 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3546 (parallel [(const_int 0) (const_int 0)
3547 (const_int 2) (const_int 2)
3548 (const_int 4) (const_int 4)
3549 (const_int 6) (const_int 6)])))]
3551 "vmovsldup\t{%1, %0|%0, %1}"
3552 [(set_attr "type" "sse")
3553 (set_attr "prefix" "vex")
3554 (set_attr "mode" "V8SF")])
3556 (define_insn "sse3_movsldup"
3557 [(set (match_operand:V4SF 0 "register_operand" "=x")
3560 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3562 (parallel [(const_int 0)
3567 "%vmovsldup\t{%1, %0|%0, %1}"
3568 [(set_attr "type" "sse")
3569 (set_attr "prefix_rep" "1")
3570 (set_attr "prefix" "maybe_vex")
3571 (set_attr "mode" "V4SF")])
3573 (define_expand "avx_shufps256"
3574 [(match_operand:V8SF 0 "register_operand" "")
3575 (match_operand:V8SF 1 "register_operand" "")
3576 (match_operand:V8SF 2 "nonimmediate_operand" "")
3577 (match_operand:SI 3 "const_int_operand" "")]
3580 int mask = INTVAL (operands[3]);
3581 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3582 GEN_INT ((mask >> 0) & 3),
3583 GEN_INT ((mask >> 2) & 3),
3584 GEN_INT (((mask >> 4) & 3) + 8),
3585 GEN_INT (((mask >> 6) & 3) + 8),
3586 GEN_INT (((mask >> 0) & 3) + 4),
3587 GEN_INT (((mask >> 2) & 3) + 4),
3588 GEN_INT (((mask >> 4) & 3) + 12),
3589 GEN_INT (((mask >> 6) & 3) + 12)));
3593 ;; One bit in mask selects 2 elements.
3594 (define_insn "avx_shufps256_1"
3595 [(set (match_operand:V8SF 0 "register_operand" "=x")
3598 (match_operand:V8SF 1 "register_operand" "x")
3599 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3600 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3601 (match_operand 4 "const_0_to_3_operand" "")
3602 (match_operand 5 "const_8_to_11_operand" "")
3603 (match_operand 6 "const_8_to_11_operand" "")
3604 (match_operand 7 "const_4_to_7_operand" "")
3605 (match_operand 8 "const_4_to_7_operand" "")
3606 (match_operand 9 "const_12_to_15_operand" "")
3607 (match_operand 10 "const_12_to_15_operand" "")])))]
3609 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3610 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3611 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3612 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3615 mask = INTVAL (operands[3]);
3616 mask |= INTVAL (operands[4]) << 2;
3617 mask |= (INTVAL (operands[5]) - 8) << 4;
3618 mask |= (INTVAL (operands[6]) - 8) << 6;
3619 operands[3] = GEN_INT (mask);
3621 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3623 [(set_attr "type" "sselog")
3624 (set_attr "length_immediate" "1")
3625 (set_attr "prefix" "vex")
3626 (set_attr "mode" "V8SF")])
3628 (define_expand "sse_shufps"
3629 [(match_operand:V4SF 0 "register_operand" "")
3630 (match_operand:V4SF 1 "register_operand" "")
3631 (match_operand:V4SF 2 "nonimmediate_operand" "")
3632 (match_operand:SI 3 "const_int_operand" "")]
3635 int mask = INTVAL (operands[3]);
3636 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3637 GEN_INT ((mask >> 0) & 3),
3638 GEN_INT ((mask >> 2) & 3),
3639 GEN_INT (((mask >> 4) & 3) + 4),
3640 GEN_INT (((mask >> 6) & 3) + 4)));
3644 (define_insn "*avx_shufps_<mode>"
3645 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3646 (vec_select:SSEMODE4S
3647 (vec_concat:<ssedoublesizemode>
3648 (match_operand:SSEMODE4S 1 "register_operand" "x")
3649 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3650 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3651 (match_operand 4 "const_0_to_3_operand" "")
3652 (match_operand 5 "const_4_to_7_operand" "")
3653 (match_operand 6 "const_4_to_7_operand" "")])))]
3657 mask |= INTVAL (operands[3]) << 0;
3658 mask |= INTVAL (operands[4]) << 2;
3659 mask |= (INTVAL (operands[5]) - 4) << 4;
3660 mask |= (INTVAL (operands[6]) - 4) << 6;
3661 operands[3] = GEN_INT (mask);
3663 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3665 [(set_attr "type" "sselog")
3666 (set_attr "length_immediate" "1")
3667 (set_attr "prefix" "vex")
3668 (set_attr "mode" "V4SF")])
3670 (define_insn "sse_shufps_<mode>"
3671 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3672 (vec_select:SSEMODE4S
3673 (vec_concat:<ssedoublesizemode>
3674 (match_operand:SSEMODE4S 1 "register_operand" "0")
3675 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3676 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3677 (match_operand 4 "const_0_to_3_operand" "")
3678 (match_operand 5 "const_4_to_7_operand" "")
3679 (match_operand 6 "const_4_to_7_operand" "")])))]
3683 mask |= INTVAL (operands[3]) << 0;
3684 mask |= INTVAL (operands[4]) << 2;
3685 mask |= (INTVAL (operands[5]) - 4) << 4;
3686 mask |= (INTVAL (operands[6]) - 4) << 6;
3687 operands[3] = GEN_INT (mask);
3689 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3691 [(set_attr "type" "sselog")
3692 (set_attr "length_immediate" "1")
3693 (set_attr "mode" "V4SF")])
3695 (define_insn "sse_storehps"
3696 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3698 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3699 (parallel [(const_int 2) (const_int 3)])))]
3702 %vmovhps\t{%1, %0|%0, %1}
3703 %vmovhlps\t{%1, %d0|%d0, %1}
3704 %vmovlps\t{%H1, %d0|%d0, %H1}"
3705 [(set_attr "type" "ssemov")
3706 (set_attr "prefix" "maybe_vex")
3707 (set_attr "mode" "V2SF,V4SF,V2SF")])
3709 (define_expand "sse_loadhps_exp"
3710 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3713 (match_operand:V4SF 1 "nonimmediate_operand" "")
3714 (parallel [(const_int 0) (const_int 1)]))
3715 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3717 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3719 (define_insn "*avx_loadhps"
3720 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3723 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3724 (parallel [(const_int 0) (const_int 1)]))
3725 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3728 vmovhps\t{%2, %1, %0|%0, %1, %2}
3729 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3730 vmovlps\t{%2, %H0|%H0, %2}"
3731 [(set_attr "type" "ssemov")
3732 (set_attr "prefix" "vex")
3733 (set_attr "mode" "V2SF,V4SF,V2SF")])
3735 (define_insn "sse_loadhps"
3736 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3739 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3740 (parallel [(const_int 0) (const_int 1)]))
3741 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3744 movhps\t{%2, %0|%0, %2}
3745 movlhps\t{%2, %0|%0, %2}
3746 movlps\t{%2, %H0|%H0, %2}"
3747 [(set_attr "type" "ssemov")
3748 (set_attr "mode" "V2SF,V4SF,V2SF")])
3750 (define_insn "*avx_storelps"
3751 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3753 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3754 (parallel [(const_int 0) (const_int 1)])))]
3757 vmovlps\t{%1, %0|%0, %1}
3758 vmovaps\t{%1, %0|%0, %1}
3759 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3760 [(set_attr "type" "ssemov")
3761 (set_attr "prefix" "vex")
3762 (set_attr "mode" "V2SF,V2DF,V2SF")])
3764 (define_insn "sse_storelps"
3765 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3767 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3768 (parallel [(const_int 0) (const_int 1)])))]
3771 movlps\t{%1, %0|%0, %1}
3772 movaps\t{%1, %0|%0, %1}
3773 movlps\t{%1, %0|%0, %1}"
3774 [(set_attr "type" "ssemov")
3775 (set_attr "mode" "V2SF,V4SF,V2SF")])
3777 (define_expand "sse_loadlps_exp"
3778 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3780 (match_operand:V2SF 2 "nonimmediate_operand" "")
3782 (match_operand:V4SF 1 "nonimmediate_operand" "")
3783 (parallel [(const_int 2) (const_int 3)]))))]
3785 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3787 (define_insn "*avx_loadlps"
3788 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3790 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3792 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3793 (parallel [(const_int 2) (const_int 3)]))))]
3796 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3797 vmovlps\t{%2, %1, %0|%0, %1, %2}
3798 vmovlps\t{%2, %0|%0, %2}"
3799 [(set_attr "type" "sselog,ssemov,ssemov")
3800 (set_attr "length_immediate" "1,*,*")
3801 (set_attr "prefix" "vex")
3802 (set_attr "mode" "V4SF,V2SF,V2SF")])
3804 (define_insn "sse_loadlps"
3805 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3807 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3809 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3810 (parallel [(const_int 2) (const_int 3)]))))]
3813 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3814 movlps\t{%2, %0|%0, %2}
3815 movlps\t{%2, %0|%0, %2}"
3816 [(set_attr "type" "sselog,ssemov,ssemov")
3817 (set_attr "length_immediate" "1,*,*")
3818 (set_attr "mode" "V4SF,V2SF,V2SF")])
3820 (define_insn "*avx_movss"
3821 [(set (match_operand:V4SF 0 "register_operand" "=x")
3823 (match_operand:V4SF 2 "register_operand" "x")
3824 (match_operand:V4SF 1 "register_operand" "x")
3827 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3828 [(set_attr "type" "ssemov")
3829 (set_attr "prefix" "vex")
3830 (set_attr "mode" "SF")])
3832 (define_insn "sse_movss"
3833 [(set (match_operand:V4SF 0 "register_operand" "=x")
3835 (match_operand:V4SF 2 "register_operand" "x")
3836 (match_operand:V4SF 1 "register_operand" "0")
3839 "movss\t{%2, %0|%0, %2}"
3840 [(set_attr "type" "ssemov")
3841 (set_attr "mode" "SF")])
3843 (define_expand "vec_dupv4sf"
3844 [(set (match_operand:V4SF 0 "register_operand" "")
3846 (match_operand:SF 1 "nonimmediate_operand" "")))]
3850 operands[1] = force_reg (V4SFmode, operands[1]);
3853 (define_insn "*vec_dupv4sf_avx"
3854 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3856 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3859 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3860 vbroadcastss\t{%1, %0|%0, %1}"
3861 [(set_attr "type" "sselog1,ssemov")
3862 (set_attr "length_immediate" "1,0")
3863 (set_attr "prefix_extra" "0,1")
3864 (set_attr "prefix" "vex")
3865 (set_attr "mode" "V4SF")])
3867 (define_insn "*vec_dupv4sf"
3868 [(set (match_operand:V4SF 0 "register_operand" "=x")
3870 (match_operand:SF 1 "register_operand" "0")))]
3872 "shufps\t{$0, %0, %0|%0, %0, 0}"
3873 [(set_attr "type" "sselog1")
3874 (set_attr "length_immediate" "1")
3875 (set_attr "mode" "V4SF")])
3877 (define_insn "*vec_concatv2sf_avx"
3878 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3880 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3881 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3884 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3885 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3886 vmovss\t{%1, %0|%0, %1}
3887 punpckldq\t{%2, %0|%0, %2}
3888 movd\t{%1, %0|%0, %1}"
3889 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3890 (set_attr "length_immediate" "*,1,*,*,*")
3891 (set_attr "prefix_extra" "*,1,*,*,*")
3892 (set (attr "prefix")
3893 (if_then_else (eq_attr "alternative" "3,4")
3894 (const_string "orig")
3895 (const_string "vex")))
3896 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3898 ;; Although insertps takes register source, we prefer
3899 ;; unpcklps with register source since it is shorter.
3900 (define_insn "*vec_concatv2sf_sse4_1"
3901 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3903 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3904 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3907 unpcklps\t{%2, %0|%0, %2}
3908 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3909 movss\t{%1, %0|%0, %1}
3910 punpckldq\t{%2, %0|%0, %2}
3911 movd\t{%1, %0|%0, %1}"
3912 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3913 (set_attr "prefix_data16" "*,1,*,*,*")
3914 (set_attr "prefix_extra" "*,1,*,*,*")
3915 (set_attr "length_immediate" "*,1,*,*,*")
3916 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3918 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3919 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3920 ;; alternatives pretty much forces the MMX alternative to be chosen.
3921 (define_insn "*vec_concatv2sf_sse"
3922 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3924 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3925 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3928 unpcklps\t{%2, %0|%0, %2}
3929 movss\t{%1, %0|%0, %1}
3930 punpckldq\t{%2, %0|%0, %2}
3931 movd\t{%1, %0|%0, %1}"
3932 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3933 (set_attr "mode" "V4SF,SF,DI,DI")])
3935 (define_insn "*vec_concatv4sf_avx"
3936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3938 (match_operand:V2SF 1 "register_operand" " x,x")
3939 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3942 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3943 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3944 [(set_attr "type" "ssemov")
3945 (set_attr "prefix" "vex")
3946 (set_attr "mode" "V4SF,V2SF")])
3948 (define_insn "*vec_concatv4sf_sse"
3949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3951 (match_operand:V2SF 1 "register_operand" " 0,0")
3952 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3955 movlhps\t{%2, %0|%0, %2}
3956 movhps\t{%2, %0|%0, %2}"
3957 [(set_attr "type" "ssemov")
3958 (set_attr "mode" "V4SF,V2SF")])
3960 (define_expand "vec_init<mode>"
3961 [(match_operand:SSEMODE 0 "register_operand" "")
3962 (match_operand 1 "" "")]
3965 ix86_expand_vector_init (false, operands[0], operands[1]);
3969 (define_insn "*vec_set<mode>_0_avx"
3970 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3971 (vec_merge:SSEMODE4S
3972 (vec_duplicate:SSEMODE4S
3973 (match_operand:<ssescalarmode> 2
3974 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3975 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3979 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3980 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3981 vmovd\t{%2, %0|%0, %2}
3982 vmovss\t{%2, %1, %0|%0, %1, %2}
3983 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3985 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3986 (set_attr "prefix_extra" "*,*,*,*,1,*")
3987 (set_attr "length_immediate" "*,*,*,*,1,*")
3988 (set_attr "prefix" "vex")
3989 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3991 (define_insn "*vec_set<mode>_0_sse4_1"
3992 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3993 (vec_merge:SSEMODE4S
3994 (vec_duplicate:SSEMODE4S
3995 (match_operand:<ssescalarmode> 2
3996 "general_operand" " x,m,*r,x,*rm,*rfF"))
3997 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4001 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4002 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4003 movd\t{%2, %0|%0, %2}
4004 movss\t{%2, %0|%0, %2}
4005 pinsrd\t{$0, %2, %0|%0, %2, 0}
4007 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4008 (set_attr "prefix_extra" "*,*,*,*,1,*")
4009 (set_attr "length_immediate" "*,*,*,*,1,*")
4010 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4012 (define_insn "*vec_set<mode>_0_sse2"
4013 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4014 (vec_merge:SSEMODE4S
4015 (vec_duplicate:SSEMODE4S
4016 (match_operand:<ssescalarmode> 2
4017 "general_operand" " m,*r,x,x*rfF"))
4018 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4022 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4023 movd\t{%2, %0|%0, %2}
4024 movss\t{%2, %0|%0, %2}
4026 [(set_attr "type" "ssemov")
4027 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4029 (define_insn "vec_set<mode>_0"
4030 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4031 (vec_merge:SSEMODE4S
4032 (vec_duplicate:SSEMODE4S
4033 (match_operand:<ssescalarmode> 2
4034 "general_operand" " m,x,x*rfF"))
4035 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4039 movss\t{%2, %0|%0, %2}
4040 movss\t{%2, %0|%0, %2}
4042 [(set_attr "type" "ssemov")
4043 (set_attr "mode" "SF")])
4045 ;; A subset is vec_setv4sf.
4046 (define_insn "*vec_setv4sf_avx"
4047 [(set (match_operand:V4SF 0 "register_operand" "=x")
4050 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4051 (match_operand:V4SF 1 "register_operand" "x")
4052 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4055 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4056 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4058 [(set_attr "type" "sselog")
4059 (set_attr "prefix_extra" "1")
4060 (set_attr "length_immediate" "1")
4061 (set_attr "prefix" "vex")
4062 (set_attr "mode" "V4SF")])
4064 (define_insn "*vec_setv4sf_sse4_1"
4065 [(set (match_operand:V4SF 0 "register_operand" "=x")
4068 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4069 (match_operand:V4SF 1 "register_operand" "0")
4070 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4073 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4074 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4076 [(set_attr "type" "sselog")
4077 (set_attr "prefix_data16" "1")
4078 (set_attr "prefix_extra" "1")
4079 (set_attr "length_immediate" "1")
4080 (set_attr "mode" "V4SF")])
4082 (define_insn "*avx_insertps"
4083 [(set (match_operand:V4SF 0 "register_operand" "=x")
4084 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4085 (match_operand:V4SF 1 "register_operand" "x")
4086 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4089 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4090 [(set_attr "type" "sselog")
4091 (set_attr "prefix" "vex")
4092 (set_attr "prefix_extra" "1")
4093 (set_attr "length_immediate" "1")
4094 (set_attr "mode" "V4SF")])
4096 (define_insn "sse4_1_insertps"
4097 [(set (match_operand:V4SF 0 "register_operand" "=x")
4098 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4099 (match_operand:V4SF 1 "register_operand" "0")
4100 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4103 "insertps\t{%3, %2, %0|%0, %2, %3}";
4104 [(set_attr "type" "sselog")
4105 (set_attr "prefix_data16" "1")
4106 (set_attr "prefix_extra" "1")
4107 (set_attr "length_immediate" "1")
4108 (set_attr "mode" "V4SF")])
4111 [(set (match_operand:V4SF 0 "memory_operand" "")
4114 (match_operand:SF 1 "nonmemory_operand" ""))
4117 "TARGET_SSE && reload_completed"
4120 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4124 (define_expand "vec_set<mode>"
4125 [(match_operand:SSEMODE 0 "register_operand" "")
4126 (match_operand:<ssescalarmode> 1 "register_operand" "")
4127 (match_operand 2 "const_int_operand" "")]
4130 ix86_expand_vector_set (false, operands[0], operands[1],
4131 INTVAL (operands[2]));
4135 (define_insn_and_split "*vec_extractv4sf_0"
4136 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4138 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4139 (parallel [(const_int 0)])))]
4140 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4142 "&& reload_completed"
4145 rtx op1 = operands[1];
4147 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4149 op1 = gen_lowpart (SFmode, op1);
4150 emit_move_insn (operands[0], op1);
4154 (define_expand "avx_vextractf128<mode>"
4155 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4156 (match_operand:AVX256MODE 1 "register_operand" "")
4157 (match_operand:SI 2 "const_0_to_1_operand" "")]
4160 switch (INTVAL (operands[2]))
4163 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4166 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4174 (define_insn_and_split "vec_extract_lo_<mode>"
4175 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4176 (vec_select:<avxhalfvecmode>
4177 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4178 (parallel [(const_int 0) (const_int 1)])))]
4181 "&& reload_completed"
4184 rtx op1 = operands[1];
4186 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4188 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4189 emit_move_insn (operands[0], op1);
4193 (define_insn "vec_extract_hi_<mode>"
4194 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4195 (vec_select:<avxhalfvecmode>
4196 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4197 (parallel [(const_int 2) (const_int 3)])))]
4199 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4200 [(set_attr "type" "sselog")
4201 (set_attr "prefix_extra" "1")
4202 (set_attr "length_immediate" "1")
4203 (set_attr "memory" "none,store")
4204 (set_attr "prefix" "vex")
4205 (set_attr "mode" "V8SF")])
4207 (define_insn_and_split "vec_extract_lo_<mode>"
4208 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4209 (vec_select:<avxhalfvecmode>
4210 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4211 (parallel [(const_int 0) (const_int 1)
4212 (const_int 2) (const_int 3)])))]
4215 "&& reload_completed"
4218 rtx op1 = operands[1];
4220 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4222 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4223 emit_move_insn (operands[0], op1);
4227 (define_insn "vec_extract_hi_<mode>"
4228 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4229 (vec_select:<avxhalfvecmode>
4230 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4231 (parallel [(const_int 4) (const_int 5)
4232 (const_int 6) (const_int 7)])))]
4234 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4235 [(set_attr "type" "sselog")
4236 (set_attr "prefix_extra" "1")
4237 (set_attr "length_immediate" "1")
4238 (set_attr "memory" "none,store")
4239 (set_attr "prefix" "vex")
4240 (set_attr "mode" "V8SF")])
4242 (define_insn_and_split "vec_extract_lo_v16hi"
4243 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4245 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4246 (parallel [(const_int 0) (const_int 1)
4247 (const_int 2) (const_int 3)
4248 (const_int 4) (const_int 5)
4249 (const_int 6) (const_int 7)])))]
4252 "&& reload_completed"
4255 rtx op1 = operands[1];
4257 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4259 op1 = gen_lowpart (V8HImode, op1);
4260 emit_move_insn (operands[0], op1);
4264 (define_insn "vec_extract_hi_v16hi"
4265 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4267 (match_operand:V16HI 1 "register_operand" "x,x")
4268 (parallel [(const_int 8) (const_int 9)
4269 (const_int 10) (const_int 11)
4270 (const_int 12) (const_int 13)
4271 (const_int 14) (const_int 15)])))]
4273 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4274 [(set_attr "type" "sselog")
4275 (set_attr "prefix_extra" "1")
4276 (set_attr "length_immediate" "1")
4277 (set_attr "memory" "none,store")
4278 (set_attr "prefix" "vex")
4279 (set_attr "mode" "V8SF")])
4281 (define_insn_and_split "vec_extract_lo_v32qi"
4282 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4284 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4285 (parallel [(const_int 0) (const_int 1)
4286 (const_int 2) (const_int 3)
4287 (const_int 4) (const_int 5)
4288 (const_int 6) (const_int 7)
4289 (const_int 8) (const_int 9)
4290 (const_int 10) (const_int 11)
4291 (const_int 12) (const_int 13)
4292 (const_int 14) (const_int 15)])))]
4295 "&& reload_completed"
4298 rtx op1 = operands[1];
4300 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4302 op1 = gen_lowpart (V16QImode, op1);
4303 emit_move_insn (operands[0], op1);
4307 (define_insn "vec_extract_hi_v32qi"
4308 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4310 (match_operand:V32QI 1 "register_operand" "x,x")
4311 (parallel [(const_int 16) (const_int 17)
4312 (const_int 18) (const_int 19)
4313 (const_int 20) (const_int 21)
4314 (const_int 22) (const_int 23)
4315 (const_int 24) (const_int 25)
4316 (const_int 26) (const_int 27)
4317 (const_int 28) (const_int 29)
4318 (const_int 30) (const_int 31)])))]
4320 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4321 [(set_attr "type" "sselog")
4322 (set_attr "prefix_extra" "1")
4323 (set_attr "length_immediate" "1")
4324 (set_attr "memory" "none,store")
4325 (set_attr "prefix" "vex")
4326 (set_attr "mode" "V8SF")])
4328 (define_insn "*sse4_1_extractps"
4329 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4331 (match_operand:V4SF 1 "register_operand" "x")
4332 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4334 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4335 [(set_attr "type" "sselog")
4336 (set_attr "prefix_data16" "1")
4337 (set_attr "prefix_extra" "1")
4338 (set_attr "length_immediate" "1")
4339 (set_attr "prefix" "maybe_vex")
4340 (set_attr "mode" "V4SF")])
4342 (define_insn_and_split "*vec_extract_v4sf_mem"
4343 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4345 (match_operand:V4SF 1 "memory_operand" "o")
4346 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4352 int i = INTVAL (operands[2]);
4354 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4358 (define_expand "vec_extract<mode>"
4359 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4360 (match_operand:SSEMODE 1 "register_operand" "")
4361 (match_operand 2 "const_int_operand" "")]
4364 ix86_expand_vector_extract (false, operands[0], operands[1],
4365 INTVAL (operands[2]));
4369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4371 ;; Parallel double-precision floating point element swizzling
4373 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4375 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4376 (define_insn "avx_unpckhpd256"
4377 [(set (match_operand:V4DF 0 "register_operand" "=x")
4380 (match_operand:V4DF 1 "register_operand" "x")
4381 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4382 (parallel [(const_int 1) (const_int 5)
4383 (const_int 3) (const_int 7)])))]
4385 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4386 [(set_attr "type" "sselog")
4387 (set_attr "prefix" "vex")
4388 (set_attr "mode" "V4DF")])
4390 (define_expand "vec_interleave_highv2df"
4391 [(set (match_operand:V2DF 0 "register_operand" "")
4394 (match_operand:V2DF 1 "nonimmediate_operand" "")
4395 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4396 (parallel [(const_int 1)
4400 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4401 operands[2] = force_reg (V2DFmode, operands[2]);
4404 (define_insn "*avx_interleave_highv2df"
4405 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4408 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4409 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4410 (parallel [(const_int 1)
4412 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4414 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4415 vmovddup\t{%H1, %0|%0, %H1}
4416 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4417 vmovhpd\t{%1, %0|%0, %1}"
4418 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4419 (set_attr "prefix" "vex")
4420 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4422 (define_insn "*sse3_interleave_highv2df"
4423 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4426 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4427 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4428 (parallel [(const_int 1)
4430 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4432 unpckhpd\t{%2, %0|%0, %2}
4433 movddup\t{%H1, %0|%0, %H1}
4434 movlpd\t{%H1, %0|%0, %H1}
4435 movhpd\t{%1, %0|%0, %1}"
4436 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4437 (set_attr "prefix_data16" "*,*,1,1")
4438 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4440 (define_insn "*sse2_interleave_highv2df"
4441 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4444 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4445 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4446 (parallel [(const_int 1)
4448 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4450 unpckhpd\t{%2, %0|%0, %2}
4451 movlpd\t{%H1, %0|%0, %H1}
4452 movhpd\t{%1, %0|%0, %1}"
4453 [(set_attr "type" "sselog,ssemov,ssemov")
4454 (set_attr "prefix_data16" "*,1,1")
4455 (set_attr "mode" "V2DF,V1DF,V1DF")])
4457 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4458 (define_expand "avx_movddup256"
4459 [(set (match_operand:V4DF 0 "register_operand" "")
4462 (match_operand:V4DF 1 "nonimmediate_operand" "")
4464 (parallel [(const_int 0) (const_int 4)
4465 (const_int 2) (const_int 6)])))]
4469 (define_expand "avx_unpcklpd256"
4470 [(set (match_operand:V4DF 0 "register_operand" "")
4473 (match_operand:V4DF 1 "register_operand" "")
4474 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4475 (parallel [(const_int 0) (const_int 4)
4476 (const_int 2) (const_int 6)])))]
4480 (define_insn "*avx_unpcklpd256"
4481 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4484 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4485 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4486 (parallel [(const_int 0) (const_int 4)
4487 (const_int 2) (const_int 6)])))]
4489 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4491 vmovddup\t{%1, %0|%0, %1}
4492 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4493 [(set_attr "type" "sselog")
4494 (set_attr "prefix" "vex")
4495 (set_attr "mode" "V4DF")])
4497 (define_expand "vec_interleave_lowv2df"
4498 [(set (match_operand:V2DF 0 "register_operand" "")
4501 (match_operand:V2DF 1 "nonimmediate_operand" "")
4502 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4503 (parallel [(const_int 0)
4507 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4508 operands[1] = force_reg (V2DFmode, operands[1]);
4511 (define_insn "*avx_interleave_lowv2df"
4512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4515 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4516 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4517 (parallel [(const_int 0)
4519 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4521 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4522 vmovddup\t{%1, %0|%0, %1}
4523 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4524 vmovlpd\t{%2, %H0|%H0, %2}"
4525 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4526 (set_attr "prefix" "vex")
4527 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4529 (define_insn "*sse3_interleave_lowv2df"
4530 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4533 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4534 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4535 (parallel [(const_int 0)
4537 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4539 unpcklpd\t{%2, %0|%0, %2}
4540 movddup\t{%1, %0|%0, %1}
4541 movhpd\t{%2, %0|%0, %2}
4542 movlpd\t{%2, %H0|%H0, %2}"
4543 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4544 (set_attr "prefix_data16" "*,*,1,1")
4545 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4547 (define_insn "*sse2_interleave_lowv2df"
4548 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4551 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4552 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4553 (parallel [(const_int 0)
4555 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4557 unpcklpd\t{%2, %0|%0, %2}
4558 movhpd\t{%2, %0|%0, %2}
4559 movlpd\t{%2, %H0|%H0, %2}"
4560 [(set_attr "type" "sselog,ssemov,ssemov")
4561 (set_attr "prefix_data16" "*,1,1")
4562 (set_attr "mode" "V2DF,V1DF,V1DF")])
4565 [(set (match_operand:V2DF 0 "memory_operand" "")
4568 (match_operand:V2DF 1 "register_operand" "")
4570 (parallel [(const_int 0)
4572 "TARGET_SSE3 && reload_completed"
4575 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4576 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4577 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4582 [(set (match_operand:V2DF 0 "register_operand" "")
4585 (match_operand:V2DF 1 "memory_operand" "")
4587 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4588 (match_operand:SI 3 "const_int_operand" "")])))]
4589 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4590 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4592 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4595 (define_expand "avx_shufpd256"
4596 [(match_operand:V4DF 0 "register_operand" "")
4597 (match_operand:V4DF 1 "register_operand" "")
4598 (match_operand:V4DF 2 "nonimmediate_operand" "")
4599 (match_operand:SI 3 "const_int_operand" "")]
4602 int mask = INTVAL (operands[3]);
4603 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4605 GEN_INT (mask & 2 ? 5 : 4),
4606 GEN_INT (mask & 4 ? 3 : 2),
4607 GEN_INT (mask & 8 ? 7 : 6)));
4611 (define_insn "avx_shufpd256_1"
4612 [(set (match_operand:V4DF 0 "register_operand" "=x")
4615 (match_operand:V4DF 1 "register_operand" "x")
4616 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4617 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4618 (match_operand 4 "const_4_to_5_operand" "")
4619 (match_operand 5 "const_2_to_3_operand" "")
4620 (match_operand 6 "const_6_to_7_operand" "")])))]
4624 mask = INTVAL (operands[3]);
4625 mask |= (INTVAL (operands[4]) - 4) << 1;
4626 mask |= (INTVAL (operands[5]) - 2) << 2;
4627 mask |= (INTVAL (operands[6]) - 6) << 3;
4628 operands[3] = GEN_INT (mask);
4630 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4632 [(set_attr "type" "sselog")
4633 (set_attr "length_immediate" "1")
4634 (set_attr "prefix" "vex")
4635 (set_attr "mode" "V4DF")])
4637 (define_expand "sse2_shufpd"
4638 [(match_operand:V2DF 0 "register_operand" "")
4639 (match_operand:V2DF 1 "register_operand" "")
4640 (match_operand:V2DF 2 "nonimmediate_operand" "")
4641 (match_operand:SI 3 "const_int_operand" "")]
4644 int mask = INTVAL (operands[3]);
4645 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4647 GEN_INT (mask & 2 ? 3 : 2)));
4651 (define_expand "vec_extract_even<mode>"
4652 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4653 (match_operand:SSEMODE_EO 1 "register_operand" "")
4654 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4657 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4661 (define_expand "vec_extract_odd<mode>"
4662 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4663 (match_operand:SSEMODE_EO 1 "register_operand" "")
4664 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4667 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4671 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4672 (define_insn "*avx_interleave_highv2di"
4673 [(set (match_operand:V2DI 0 "register_operand" "=x")
4676 (match_operand:V2DI 1 "register_operand" "x")
4677 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4678 (parallel [(const_int 1)
4681 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4682 [(set_attr "type" "sselog")
4683 (set_attr "prefix" "vex")
4684 (set_attr "mode" "TI")])
4686 (define_insn "vec_interleave_highv2di"
4687 [(set (match_operand:V2DI 0 "register_operand" "=x")
4690 (match_operand:V2DI 1 "register_operand" "0")
4691 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4692 (parallel [(const_int 1)
4695 "punpckhqdq\t{%2, %0|%0, %2}"
4696 [(set_attr "type" "sselog")
4697 (set_attr "prefix_data16" "1")
4698 (set_attr "mode" "TI")])
4700 (define_insn "*avx_interleave_lowv2di"
4701 [(set (match_operand:V2DI 0 "register_operand" "=x")
4704 (match_operand:V2DI 1 "register_operand" "x")
4705 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4706 (parallel [(const_int 0)
4709 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4710 [(set_attr "type" "sselog")
4711 (set_attr "prefix" "vex")
4712 (set_attr "mode" "TI")])
4714 (define_insn "vec_interleave_lowv2di"
4715 [(set (match_operand:V2DI 0 "register_operand" "=x")
4718 (match_operand:V2DI 1 "register_operand" "0")
4719 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4720 (parallel [(const_int 0)
4723 "punpcklqdq\t{%2, %0|%0, %2}"
4724 [(set_attr "type" "sselog")
4725 (set_attr "prefix_data16" "1")
4726 (set_attr "mode" "TI")])
4728 (define_insn "*avx_shufpd_<mode>"
4729 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4730 (vec_select:SSEMODE2D
4731 (vec_concat:<ssedoublesizemode>
4732 (match_operand:SSEMODE2D 1 "register_operand" "x")
4733 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4734 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4735 (match_operand 4 "const_2_to_3_operand" "")])))]
4739 mask = INTVAL (operands[3]);
4740 mask |= (INTVAL (operands[4]) - 2) << 1;
4741 operands[3] = GEN_INT (mask);
4743 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4745 [(set_attr "type" "sselog")
4746 (set_attr "length_immediate" "1")
4747 (set_attr "prefix" "vex")
4748 (set_attr "mode" "V2DF")])
4750 (define_insn "sse2_shufpd_<mode>"
4751 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4752 (vec_select:SSEMODE2D
4753 (vec_concat:<ssedoublesizemode>
4754 (match_operand:SSEMODE2D 1 "register_operand" "0")
4755 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4756 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4757 (match_operand 4 "const_2_to_3_operand" "")])))]
4761 mask = INTVAL (operands[3]);
4762 mask |= (INTVAL (operands[4]) - 2) << 1;
4763 operands[3] = GEN_INT (mask);
4765 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4767 [(set_attr "type" "sselog")
4768 (set_attr "length_immediate" "1")
4769 (set_attr "mode" "V2DF")])
4771 ;; Avoid combining registers from different units in a single alternative,
4772 ;; see comment above inline_secondary_memory_needed function in i386.c
4773 (define_insn "*avx_storehpd"
4774 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4776 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4777 (parallel [(const_int 1)])))]
4778 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4780 vmovhpd\t{%1, %0|%0, %1}
4781 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4785 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4786 (set_attr "prefix" "vex")
4787 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4789 (define_insn "sse2_storehpd"
4790 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4792 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4793 (parallel [(const_int 1)])))]
4794 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4796 movhpd\t{%1, %0|%0, %1}
4801 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4802 (set_attr "prefix_data16" "1,*,*,*,*")
4803 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4806 [(set (match_operand:DF 0 "register_operand" "")
4808 (match_operand:V2DF 1 "memory_operand" "")
4809 (parallel [(const_int 1)])))]
4810 "TARGET_SSE2 && reload_completed"
4811 [(set (match_dup 0) (match_dup 1))]
4813 operands[1] = adjust_address (operands[1], DFmode, 8);
4816 ;; Avoid combining registers from different units in a single alternative,
4817 ;; see comment above inline_secondary_memory_needed function in i386.c
4818 (define_insn "sse2_storelpd"
4819 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4821 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4822 (parallel [(const_int 0)])))]
4823 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4825 %vmovlpd\t{%1, %0|%0, %1}
4830 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4831 (set_attr "prefix_data16" "1,*,*,*,*")
4832 (set_attr "prefix" "maybe_vex")
4833 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4836 [(set (match_operand:DF 0 "register_operand" "")
4838 (match_operand:V2DF 1 "nonimmediate_operand" "")
4839 (parallel [(const_int 0)])))]
4840 "TARGET_SSE2 && reload_completed"
4843 rtx op1 = operands[1];
4845 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4847 op1 = gen_lowpart (DFmode, op1);
4848 emit_move_insn (operands[0], op1);
4852 (define_expand "sse2_loadhpd_exp"
4853 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4856 (match_operand:V2DF 1 "nonimmediate_operand" "")
4857 (parallel [(const_int 0)]))
4858 (match_operand:DF 2 "nonimmediate_operand" "")))]
4860 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4862 ;; Avoid combining registers from different units in a single alternative,
4863 ;; see comment above inline_secondary_memory_needed function in i386.c
4864 (define_insn "*avx_loadhpd"
4865 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4868 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4869 (parallel [(const_int 0)]))
4870 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4871 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4873 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4874 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4878 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4879 (set_attr "prefix" "vex")
4880 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4882 (define_insn "sse2_loadhpd"
4883 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4886 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4887 (parallel [(const_int 0)]))
4888 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4889 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4891 movhpd\t{%2, %0|%0, %2}
4892 unpcklpd\t{%2, %0|%0, %2}
4893 shufpd\t{$1, %1, %0|%0, %1, 1}
4897 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4898 (set_attr "prefix_data16" "1,*,*,*,*,*")
4899 (set_attr "length_immediate" "*,*,1,*,*,*")
4900 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4903 [(set (match_operand:V2DF 0 "memory_operand" "")
4905 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4906 (match_operand:DF 1 "register_operand" "")))]
4907 "TARGET_SSE2 && reload_completed"
4908 [(set (match_dup 0) (match_dup 1))]
4910 operands[0] = adjust_address (operands[0], DFmode, 8);
4913 (define_expand "sse2_loadlpd_exp"
4914 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4916 (match_operand:DF 2 "nonimmediate_operand" "")
4918 (match_operand:V2DF 1 "nonimmediate_operand" "")
4919 (parallel [(const_int 1)]))))]
4921 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4923 ;; Avoid combining registers from different units in a single alternative,
4924 ;; see comment above inline_secondary_memory_needed function in i386.c
4925 (define_insn "*avx_loadlpd"
4926 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4928 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4930 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4931 (parallel [(const_int 1)]))))]
4932 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4934 vmovsd\t{%2, %0|%0, %2}
4935 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4936 vmovsd\t{%2, %1, %0|%0, %1, %2}
4937 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4941 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4942 (set_attr "prefix" "vex")
4943 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4945 (define_insn "sse2_loadlpd"
4946 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4948 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4950 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4951 (parallel [(const_int 1)]))))]
4952 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4954 movsd\t{%2, %0|%0, %2}
4955 movlpd\t{%2, %0|%0, %2}
4956 movsd\t{%2, %0|%0, %2}
4957 shufpd\t{$2, %2, %0|%0, %2, 2}
4958 movhpd\t{%H1, %0|%0, %H1}
4962 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4963 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4964 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4965 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4968 [(set (match_operand:V2DF 0 "memory_operand" "")
4970 (match_operand:DF 1 "register_operand" "")
4971 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4972 "TARGET_SSE2 && reload_completed"
4973 [(set (match_dup 0) (match_dup 1))]
4975 operands[0] = adjust_address (operands[0], DFmode, 8);
4978 ;; Not sure these two are ever used, but it doesn't hurt to have
4980 (define_insn "*vec_extractv2df_1_sse"
4981 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4983 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4984 (parallel [(const_int 1)])))]
4985 "!TARGET_SSE2 && TARGET_SSE
4986 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4988 movhps\t{%1, %0|%0, %1}
4989 movhlps\t{%1, %0|%0, %1}
4990 movlps\t{%H1, %0|%0, %H1}"
4991 [(set_attr "type" "ssemov")
4992 (set_attr "mode" "V2SF,V4SF,V2SF")])
4994 (define_insn "*vec_extractv2df_0_sse"
4995 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4997 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4998 (parallel [(const_int 0)])))]
4999 "!TARGET_SSE2 && TARGET_SSE
5000 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5002 movlps\t{%1, %0|%0, %1}
5003 movaps\t{%1, %0|%0, %1}
5004 movlps\t{%1, %0|%0, %1}"
5005 [(set_attr "type" "ssemov")
5006 (set_attr "mode" "V2SF,V4SF,V2SF")])
5008 (define_insn "*avx_movsd"
5009 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5011 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5012 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5016 vmovsd\t{%2, %1, %0|%0, %1, %2}
5017 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5018 vmovlpd\t{%2, %0|%0, %2}
5019 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5020 vmovhps\t{%1, %H0|%H0, %1}"
5021 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5022 (set_attr "prefix" "vex")
5023 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5025 (define_insn "sse2_movsd"
5026 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5028 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5029 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5033 movsd\t{%2, %0|%0, %2}
5034 movlpd\t{%2, %0|%0, %2}
5035 movlpd\t{%2, %0|%0, %2}
5036 shufpd\t{$2, %2, %0|%0, %2, 2}
5037 movhps\t{%H1, %0|%0, %H1}
5038 movhps\t{%1, %H0|%H0, %1}"
5039 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5040 (set_attr "prefix_data16" "*,1,1,*,*,*")
5041 (set_attr "length_immediate" "*,*,*,1,*,*")
5042 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5044 (define_insn "*vec_dupv2df_sse3"
5045 [(set (match_operand:V2DF 0 "register_operand" "=x")
5047 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5049 "%vmovddup\t{%1, %0|%0, %1}"
5050 [(set_attr "type" "sselog1")
5051 (set_attr "prefix" "maybe_vex")
5052 (set_attr "mode" "DF")])
5054 (define_insn "vec_dupv2df"
5055 [(set (match_operand:V2DF 0 "register_operand" "=x")
5057 (match_operand:DF 1 "register_operand" "0")))]
5060 [(set_attr "type" "sselog1")
5061 (set_attr "mode" "V2DF")])
5063 (define_insn "*vec_concatv2df_sse3"
5064 [(set (match_operand:V2DF 0 "register_operand" "=x")
5066 (match_operand:DF 1 "nonimmediate_operand" "xm")
5069 "%vmovddup\t{%1, %0|%0, %1}"
5070 [(set_attr "type" "sselog1")
5071 (set_attr "prefix" "maybe_vex")
5072 (set_attr "mode" "DF")])
5074 (define_insn "*vec_concatv2df_avx"
5075 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5077 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5078 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5081 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5082 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5083 vmovsd\t{%1, %0|%0, %1}"
5084 [(set_attr "type" "ssemov")
5085 (set_attr "prefix" "vex")
5086 (set_attr "mode" "DF,V1DF,DF")])
5088 (define_insn "*vec_concatv2df"
5089 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5091 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5092 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5095 unpcklpd\t{%2, %0|%0, %2}
5096 movhpd\t{%2, %0|%0, %2}
5097 movsd\t{%1, %0|%0, %1}
5098 movlhps\t{%2, %0|%0, %2}
5099 movhps\t{%2, %0|%0, %2}"
5100 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5101 (set_attr "prefix_data16" "*,1,*,*,*")
5102 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5104 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5106 ;; Parallel integral arithmetic
5108 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5110 (define_expand "neg<mode>2"
5111 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5114 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5116 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5118 (define_expand "<plusminus_insn><mode>3"
5119 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5121 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5122 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5124 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5126 (define_insn "*avx_<plusminus_insn><mode>3"
5127 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5129 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5130 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5131 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5132 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5133 [(set_attr "type" "sseiadd")
5134 (set_attr "prefix" "vex")
5135 (set_attr "mode" "TI")])
5137 (define_insn "*<plusminus_insn><mode>3"
5138 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5140 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5141 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5142 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5143 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5144 [(set_attr "type" "sseiadd")
5145 (set_attr "prefix_data16" "1")
5146 (set_attr "mode" "TI")])
5148 (define_expand "sse2_<plusminus_insn><mode>3"
5149 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5150 (sat_plusminus:SSEMODE12
5151 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5152 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5154 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5156 (define_insn "*avx_<plusminus_insn><mode>3"
5157 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5158 (sat_plusminus:SSEMODE12
5159 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5160 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5161 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5162 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5163 [(set_attr "type" "sseiadd")
5164 (set_attr "prefix" "vex")
5165 (set_attr "mode" "TI")])
5167 (define_insn "*sse2_<plusminus_insn><mode>3"
5168 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5169 (sat_plusminus:SSEMODE12
5170 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5171 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5172 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5173 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5174 [(set_attr "type" "sseiadd")
5175 (set_attr "prefix_data16" "1")
5176 (set_attr "mode" "TI")])
5178 (define_insn_and_split "mulv16qi3"
5179 [(set (match_operand:V16QI 0 "register_operand" "")
5180 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5181 (match_operand:V16QI 2 "register_operand" "")))]
5183 && can_create_pseudo_p ()"
5191 for (i = 0; i < 6; ++i)
5192 t[i] = gen_reg_rtx (V16QImode);
5194 /* Unpack data such that we've got a source byte in each low byte of
5195 each word. We don't care what goes into the high byte of each word.
5196 Rather than trying to get zero in there, most convenient is to let
5197 it be a copy of the low byte. */
5198 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5199 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5200 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5201 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5203 /* Multiply words. The end-of-line annotations here give a picture of what
5204 the output of that instruction looks like. Dot means don't care; the
5205 letters are the bytes of the result with A being the most significant. */
5206 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5207 gen_lowpart (V8HImode, t[0]),
5208 gen_lowpart (V8HImode, t[1])));
5209 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5210 gen_lowpart (V8HImode, t[2]),
5211 gen_lowpart (V8HImode, t[3])));
5213 /* Extract the even bytes and merge them back together. */
5214 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5218 (define_expand "mulv8hi3"
5219 [(set (match_operand:V8HI 0 "register_operand" "")
5220 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5221 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5223 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5225 (define_insn "*avx_mulv8hi3"
5226 [(set (match_operand:V8HI 0 "register_operand" "=x")
5227 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5228 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5229 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5230 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5231 [(set_attr "type" "sseimul")
5232 (set_attr "prefix" "vex")
5233 (set_attr "mode" "TI")])
5235 (define_insn "*mulv8hi3"
5236 [(set (match_operand:V8HI 0 "register_operand" "=x")
5237 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5238 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5239 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5240 "pmullw\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseimul")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5245 (define_expand "smulv8hi3_highpart"
5246 [(set (match_operand:V8HI 0 "register_operand" "")
5251 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5253 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5256 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5258 (define_insn "*avxv8hi3_highpart"
5259 [(set (match_operand:V8HI 0 "register_operand" "=x")
5264 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5266 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5268 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5269 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5270 [(set_attr "type" "sseimul")
5271 (set_attr "prefix" "vex")
5272 (set_attr "mode" "TI")])
5274 (define_insn "*smulv8hi3_highpart"
5275 [(set (match_operand:V8HI 0 "register_operand" "=x")
5280 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5282 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5284 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5285 "pmulhw\t{%2, %0|%0, %2}"
5286 [(set_attr "type" "sseimul")
5287 (set_attr "prefix_data16" "1")
5288 (set_attr "mode" "TI")])
5290 (define_expand "umulv8hi3_highpart"
5291 [(set (match_operand:V8HI 0 "register_operand" "")
5296 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5298 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5301 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5303 (define_insn "*avx_umulv8hi3_highpart"
5304 [(set (match_operand:V8HI 0 "register_operand" "=x")
5309 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5311 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5313 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5314 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5315 [(set_attr "type" "sseimul")
5316 (set_attr "prefix" "vex")
5317 (set_attr "mode" "TI")])
5319 (define_insn "*umulv8hi3_highpart"
5320 [(set (match_operand:V8HI 0 "register_operand" "=x")
5325 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5327 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5329 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5330 "pmulhuw\t{%2, %0|%0, %2}"
5331 [(set_attr "type" "sseimul")
5332 (set_attr "prefix_data16" "1")
5333 (set_attr "mode" "TI")])
5335 (define_expand "sse2_umulv2siv2di3"
5336 [(set (match_operand:V2DI 0 "register_operand" "")
5340 (match_operand:V4SI 1 "nonimmediate_operand" "")
5341 (parallel [(const_int 0) (const_int 2)])))
5344 (match_operand:V4SI 2 "nonimmediate_operand" "")
5345 (parallel [(const_int 0) (const_int 2)])))))]
5347 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5349 (define_insn "*avx_umulv2siv2di3"
5350 [(set (match_operand:V2DI 0 "register_operand" "=x")
5354 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5355 (parallel [(const_int 0) (const_int 2)])))
5358 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5359 (parallel [(const_int 0) (const_int 2)])))))]
5360 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5361 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5362 [(set_attr "type" "sseimul")
5363 (set_attr "prefix" "vex")
5364 (set_attr "mode" "TI")])
5366 (define_insn "*sse2_umulv2siv2di3"
5367 [(set (match_operand:V2DI 0 "register_operand" "=x")
5371 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5372 (parallel [(const_int 0) (const_int 2)])))
5375 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5376 (parallel [(const_int 0) (const_int 2)])))))]
5377 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5378 "pmuludq\t{%2, %0|%0, %2}"
5379 [(set_attr "type" "sseimul")
5380 (set_attr "prefix_data16" "1")
5381 (set_attr "mode" "TI")])
5383 (define_expand "sse4_1_mulv2siv2di3"
5384 [(set (match_operand:V2DI 0 "register_operand" "")
5388 (match_operand:V4SI 1 "nonimmediate_operand" "")
5389 (parallel [(const_int 0) (const_int 2)])))
5392 (match_operand:V4SI 2 "nonimmediate_operand" "")
5393 (parallel [(const_int 0) (const_int 2)])))))]
5395 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5397 (define_insn "*avx_mulv2siv2di3"
5398 [(set (match_operand:V2DI 0 "register_operand" "=x")
5402 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5403 (parallel [(const_int 0) (const_int 2)])))
5406 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5407 (parallel [(const_int 0) (const_int 2)])))))]
5408 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5409 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5410 [(set_attr "type" "sseimul")
5411 (set_attr "prefix_extra" "1")
5412 (set_attr "prefix" "vex")
5413 (set_attr "mode" "TI")])
5415 (define_insn "*sse4_1_mulv2siv2di3"
5416 [(set (match_operand:V2DI 0 "register_operand" "=x")
5420 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5421 (parallel [(const_int 0) (const_int 2)])))
5424 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5425 (parallel [(const_int 0) (const_int 2)])))))]
5426 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5427 "pmuldq\t{%2, %0|%0, %2}"
5428 [(set_attr "type" "sseimul")
5429 (set_attr "prefix_extra" "1")
5430 (set_attr "mode" "TI")])
5432 (define_expand "sse2_pmaddwd"
5433 [(set (match_operand:V4SI 0 "register_operand" "")
5438 (match_operand:V8HI 1 "nonimmediate_operand" "")
5439 (parallel [(const_int 0)
5445 (match_operand:V8HI 2 "nonimmediate_operand" "")
5446 (parallel [(const_int 0)
5452 (vec_select:V4HI (match_dup 1)
5453 (parallel [(const_int 1)
5458 (vec_select:V4HI (match_dup 2)
5459 (parallel [(const_int 1)
5462 (const_int 7)]))))))]
5464 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5466 (define_insn "*avx_pmaddwd"
5467 [(set (match_operand:V4SI 0 "register_operand" "=x")
5472 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5473 (parallel [(const_int 0)
5479 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5480 (parallel [(const_int 0)
5486 (vec_select:V4HI (match_dup 1)
5487 (parallel [(const_int 1)
5492 (vec_select:V4HI (match_dup 2)
5493 (parallel [(const_int 1)
5496 (const_int 7)]))))))]
5497 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5498 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5499 [(set_attr "type" "sseiadd")
5500 (set_attr "prefix" "vex")
5501 (set_attr "mode" "TI")])
5503 (define_insn "*sse2_pmaddwd"
5504 [(set (match_operand:V4SI 0 "register_operand" "=x")
5509 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5510 (parallel [(const_int 0)
5516 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5517 (parallel [(const_int 0)
5523 (vec_select:V4HI (match_dup 1)
5524 (parallel [(const_int 1)
5529 (vec_select:V4HI (match_dup 2)
5530 (parallel [(const_int 1)
5533 (const_int 7)]))))))]
5534 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5535 "pmaddwd\t{%2, %0|%0, %2}"
5536 [(set_attr "type" "sseiadd")
5537 (set_attr "atom_unit" "simul")
5538 (set_attr "prefix_data16" "1")
5539 (set_attr "mode" "TI")])
5541 (define_expand "mulv4si3"
5542 [(set (match_operand:V4SI 0 "register_operand" "")
5543 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5544 (match_operand:V4SI 2 "register_operand" "")))]
5547 if (TARGET_SSE4_1 || TARGET_AVX)
5548 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5551 (define_insn "*avx_mulv4si3"
5552 [(set (match_operand:V4SI 0 "register_operand" "=x")
5553 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5554 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5555 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5556 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5557 [(set_attr "type" "sseimul")
5558 (set_attr "prefix_extra" "1")
5559 (set_attr "prefix" "vex")
5560 (set_attr "mode" "TI")])
5562 (define_insn "*sse4_1_mulv4si3"
5563 [(set (match_operand:V4SI 0 "register_operand" "=x")
5564 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5565 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5566 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5567 "pmulld\t{%2, %0|%0, %2}"
5568 [(set_attr "type" "sseimul")
5569 (set_attr "prefix_extra" "1")
5570 (set_attr "mode" "TI")])
5572 (define_insn_and_split "*sse2_mulv4si3"
5573 [(set (match_operand:V4SI 0 "register_operand" "")
5574 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5575 (match_operand:V4SI 2 "register_operand" "")))]
5576 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5577 && can_create_pseudo_p ()"
5582 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5588 t1 = gen_reg_rtx (V4SImode);
5589 t2 = gen_reg_rtx (V4SImode);
5590 t3 = gen_reg_rtx (V4SImode);
5591 t4 = gen_reg_rtx (V4SImode);
5592 t5 = gen_reg_rtx (V4SImode);
5593 t6 = gen_reg_rtx (V4SImode);
5594 thirtytwo = GEN_INT (32);
5596 /* Multiply elements 2 and 0. */
5597 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5600 /* Shift both input vectors down one element, so that elements 3
5601 and 1 are now in the slots for elements 2 and 0. For K8, at
5602 least, this is faster than using a shuffle. */
5603 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5604 gen_lowpart (V1TImode, op1),
5606 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5607 gen_lowpart (V1TImode, op2),
5609 /* Multiply elements 3 and 1. */
5610 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5613 /* Move the results in element 2 down to element 1; we don't care
5614 what goes in elements 2 and 3. */
5615 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5616 const0_rtx, const0_rtx));
5617 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5618 const0_rtx, const0_rtx));
5620 /* Merge the parts back together. */
5621 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5625 (define_insn_and_split "mulv2di3"
5626 [(set (match_operand:V2DI 0 "register_operand" "")
5627 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5628 (match_operand:V2DI 2 "register_operand" "")))]
5630 && can_create_pseudo_p ()"
5635 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5644 /* op1: A,B,C,D, op2: E,F,G,H */
5645 op1 = gen_lowpart (V4SImode, op1);
5646 op2 = gen_lowpart (V4SImode, op2);
5648 t1 = gen_reg_rtx (V4SImode);
5649 t2 = gen_reg_rtx (V4SImode);
5650 t3 = gen_reg_rtx (V2DImode);
5651 t4 = gen_reg_rtx (V2DImode);
5654 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5660 /* t2: (B*E),(A*F),(D*G),(C*H) */
5661 emit_insn (gen_mulv4si3 (t2, t1, op2));
5663 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5664 emit_insn (gen_xop_phadddq (t3, t2));
5666 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5667 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5669 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5670 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5674 t1 = gen_reg_rtx (V2DImode);
5675 t2 = gen_reg_rtx (V2DImode);
5676 t3 = gen_reg_rtx (V2DImode);
5677 t4 = gen_reg_rtx (V2DImode);
5678 t5 = gen_reg_rtx (V2DImode);
5679 t6 = gen_reg_rtx (V2DImode);
5680 thirtytwo = GEN_INT (32);
5682 /* Multiply low parts. */
5683 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5684 gen_lowpart (V4SImode, op2)));
5686 /* Shift input vectors left 32 bits so we can multiply high parts. */
5687 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5688 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5690 /* Multiply high parts by low parts. */
5691 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5692 gen_lowpart (V4SImode, t3)));
5693 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5694 gen_lowpart (V4SImode, t2)));
5696 /* Shift them back. */
5697 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5698 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5700 /* Add the three parts together. */
5701 emit_insn (gen_addv2di3 (t6, t1, t4));
5702 emit_insn (gen_addv2di3 (op0, t6, t5));
5707 (define_expand "vec_widen_smult_hi_v8hi"
5708 [(match_operand:V4SI 0 "register_operand" "")
5709 (match_operand:V8HI 1 "register_operand" "")
5710 (match_operand:V8HI 2 "register_operand" "")]
5713 rtx op1, op2, t1, t2, dest;
5717 t1 = gen_reg_rtx (V8HImode);
5718 t2 = gen_reg_rtx (V8HImode);
5719 dest = gen_lowpart (V8HImode, operands[0]);
5721 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5722 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5723 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5727 (define_expand "vec_widen_smult_lo_v8hi"
5728 [(match_operand:V4SI 0 "register_operand" "")
5729 (match_operand:V8HI 1 "register_operand" "")
5730 (match_operand:V8HI 2 "register_operand" "")]
5733 rtx op1, op2, t1, t2, dest;
5737 t1 = gen_reg_rtx (V8HImode);
5738 t2 = gen_reg_rtx (V8HImode);
5739 dest = gen_lowpart (V8HImode, operands[0]);
5741 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5742 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5743 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5747 (define_expand "vec_widen_umult_hi_v8hi"
5748 [(match_operand:V4SI 0 "register_operand" "")
5749 (match_operand:V8HI 1 "register_operand" "")
5750 (match_operand:V8HI 2 "register_operand" "")]
5753 rtx op1, op2, t1, t2, dest;
5757 t1 = gen_reg_rtx (V8HImode);
5758 t2 = gen_reg_rtx (V8HImode);
5759 dest = gen_lowpart (V8HImode, operands[0]);
5761 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5762 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5763 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5767 (define_expand "vec_widen_umult_lo_v8hi"
5768 [(match_operand:V4SI 0 "register_operand" "")
5769 (match_operand:V8HI 1 "register_operand" "")
5770 (match_operand:V8HI 2 "register_operand" "")]
5773 rtx op1, op2, t1, t2, dest;
5777 t1 = gen_reg_rtx (V8HImode);
5778 t2 = gen_reg_rtx (V8HImode);
5779 dest = gen_lowpart (V8HImode, operands[0]);
5781 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5782 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5783 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5787 (define_expand "vec_widen_smult_hi_v4si"
5788 [(match_operand:V2DI 0 "register_operand" "")
5789 (match_operand:V4SI 1 "register_operand" "")
5790 (match_operand:V4SI 2 "register_operand" "")]
5795 t1 = gen_reg_rtx (V4SImode);
5796 t2 = gen_reg_rtx (V4SImode);
5798 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5803 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5808 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5812 (define_expand "vec_widen_smult_lo_v4si"
5813 [(match_operand:V2DI 0 "register_operand" "")
5814 (match_operand:V4SI 1 "register_operand" "")
5815 (match_operand:V4SI 2 "register_operand" "")]
5820 t1 = gen_reg_rtx (V4SImode);
5821 t2 = gen_reg_rtx (V4SImode);
5823 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5828 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5833 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5837 (define_expand "vec_widen_umult_hi_v4si"
5838 [(match_operand:V2DI 0 "register_operand" "")
5839 (match_operand:V4SI 1 "register_operand" "")
5840 (match_operand:V4SI 2 "register_operand" "")]
5843 rtx op1, op2, t1, t2;
5847 t1 = gen_reg_rtx (V4SImode);
5848 t2 = gen_reg_rtx (V4SImode);
5850 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5851 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5852 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5856 (define_expand "vec_widen_umult_lo_v4si"
5857 [(match_operand:V2DI 0 "register_operand" "")
5858 (match_operand:V4SI 1 "register_operand" "")
5859 (match_operand:V4SI 2 "register_operand" "")]
5862 rtx op1, op2, t1, t2;
5866 t1 = gen_reg_rtx (V4SImode);
5867 t2 = gen_reg_rtx (V4SImode);
5869 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5870 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5871 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5875 (define_expand "sdot_prodv8hi"
5876 [(match_operand:V4SI 0 "register_operand" "")
5877 (match_operand:V8HI 1 "register_operand" "")
5878 (match_operand:V8HI 2 "register_operand" "")
5879 (match_operand:V4SI 3 "register_operand" "")]
5882 rtx t = gen_reg_rtx (V4SImode);
5883 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5884 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5888 (define_expand "udot_prodv4si"
5889 [(match_operand:V2DI 0 "register_operand" "")
5890 (match_operand:V4SI 1 "register_operand" "")
5891 (match_operand:V4SI 2 "register_operand" "")
5892 (match_operand:V2DI 3 "register_operand" "")]
5897 t1 = gen_reg_rtx (V2DImode);
5898 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5899 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5901 t2 = gen_reg_rtx (V4SImode);
5902 t3 = gen_reg_rtx (V4SImode);
5903 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5904 gen_lowpart (V1TImode, operands[1]),
5906 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5907 gen_lowpart (V1TImode, operands[2]),
5910 t4 = gen_reg_rtx (V2DImode);
5911 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5913 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5917 (define_insn "*avx_ashr<mode>3"
5918 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5920 (match_operand:SSEMODE24 1 "register_operand" "x")
5921 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5923 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5924 [(set_attr "type" "sseishft")
5925 (set_attr "prefix" "vex")
5926 (set (attr "length_immediate")
5927 (if_then_else (match_operand 2 "const_int_operand" "")
5929 (const_string "0")))
5930 (set_attr "mode" "TI")])
5932 (define_insn "ashr<mode>3"
5933 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5935 (match_operand:SSEMODE24 1 "register_operand" "0")
5936 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5938 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5939 [(set_attr "type" "sseishft")
5940 (set_attr "prefix_data16" "1")
5941 (set (attr "length_immediate")
5942 (if_then_else (match_operand 2 "const_int_operand" "")
5944 (const_string "0")))
5945 (set_attr "mode" "TI")])
5947 (define_insn "*avx_lshrv1ti3"
5948 [(set (match_operand:V1TI 0 "register_operand" "=x")
5950 (match_operand:V1TI 1 "register_operand" "x")
5951 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5954 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5955 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5957 [(set_attr "type" "sseishft")
5958 (set_attr "prefix" "vex")
5959 (set_attr "length_immediate" "1")
5960 (set_attr "mode" "TI")])
5962 (define_insn "*avx_lshr<mode>3"
5963 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5964 (lshiftrt:SSEMODE248
5965 (match_operand:SSEMODE248 1 "register_operand" "x")
5966 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5968 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5969 [(set_attr "type" "sseishft")
5970 (set_attr "prefix" "vex")
5971 (set (attr "length_immediate")
5972 (if_then_else (match_operand 2 "const_int_operand" "")
5974 (const_string "0")))
5975 (set_attr "mode" "TI")])
5977 (define_insn "sse2_lshrv1ti3"
5978 [(set (match_operand:V1TI 0 "register_operand" "=x")
5980 (match_operand:V1TI 1 "register_operand" "0")
5981 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5984 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5985 return "psrldq\t{%2, %0|%0, %2}";
5987 [(set_attr "type" "sseishft")
5988 (set_attr "prefix_data16" "1")
5989 (set_attr "length_immediate" "1")
5990 (set_attr "atom_unit" "sishuf")
5991 (set_attr "mode" "TI")])
5993 (define_insn "lshr<mode>3"
5994 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5995 (lshiftrt:SSEMODE248
5996 (match_operand:SSEMODE248 1 "register_operand" "0")
5997 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5999 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6000 [(set_attr "type" "sseishft")
6001 (set_attr "prefix_data16" "1")
6002 (set (attr "length_immediate")
6003 (if_then_else (match_operand 2 "const_int_operand" "")
6005 (const_string "0")))
6006 (set_attr "mode" "TI")])
6008 (define_insn "*avx_ashlv1ti3"
6009 [(set (match_operand:V1TI 0 "register_operand" "=x")
6010 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6011 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6014 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6015 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6017 [(set_attr "type" "sseishft")
6018 (set_attr "prefix" "vex")
6019 (set_attr "length_immediate" "1")
6020 (set_attr "mode" "TI")])
6022 (define_insn "*avx_ashl<mode>3"
6023 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6025 (match_operand:SSEMODE248 1 "register_operand" "x")
6026 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6028 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6029 [(set_attr "type" "sseishft")
6030 (set_attr "prefix" "vex")
6031 (set (attr "length_immediate")
6032 (if_then_else (match_operand 2 "const_int_operand" "")
6034 (const_string "0")))
6035 (set_attr "mode" "TI")])
6037 (define_insn "sse2_ashlv1ti3"
6038 [(set (match_operand:V1TI 0 "register_operand" "=x")
6039 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6040 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6043 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6044 return "pslldq\t{%2, %0|%0, %2}";
6046 [(set_attr "type" "sseishft")
6047 (set_attr "prefix_data16" "1")
6048 (set_attr "length_immediate" "1")
6049 (set_attr "mode" "TI")])
6051 (define_insn "ashl<mode>3"
6052 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6054 (match_operand:SSEMODE248 1 "register_operand" "0")
6055 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6057 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6058 [(set_attr "type" "sseishft")
6059 (set_attr "prefix_data16" "1")
6060 (set (attr "length_immediate")
6061 (if_then_else (match_operand 2 "const_int_operand" "")
6063 (const_string "0")))
6064 (set_attr "mode" "TI")])
6066 (define_expand "vec_shl_<mode>"
6067 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6069 (match_operand:SSEMODEI 1 "register_operand" "")
6070 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6073 operands[0] = gen_lowpart (V1TImode, operands[0]);
6074 operands[1] = gen_lowpart (V1TImode, operands[1]);
6077 (define_expand "vec_shr_<mode>"
6078 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6080 (match_operand:SSEMODEI 1 "register_operand" "")
6081 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6084 operands[0] = gen_lowpart (V1TImode, operands[0]);
6085 operands[1] = gen_lowpart (V1TImode, operands[1]);
6088 (define_insn "*avx_<code><mode>3"
6089 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6091 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6092 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6093 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6094 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6095 [(set_attr "type" "sseiadd")
6096 (set (attr "prefix_extra")
6098 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6101 (const_string "0")))
6102 (set_attr "prefix" "vex")
6103 (set_attr "mode" "TI")])
6105 (define_expand "<code>v16qi3"
6106 [(set (match_operand:V16QI 0 "register_operand" "")
6108 (match_operand:V16QI 1 "nonimmediate_operand" "")
6109 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6111 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6113 (define_insn "*<code>v16qi3"
6114 [(set (match_operand:V16QI 0 "register_operand" "=x")
6116 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6117 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6118 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6119 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6120 [(set_attr "type" "sseiadd")
6121 (set_attr "prefix_data16" "1")
6122 (set_attr "mode" "TI")])
6124 (define_expand "<code>v8hi3"
6125 [(set (match_operand:V8HI 0 "register_operand" "")
6127 (match_operand:V8HI 1 "nonimmediate_operand" "")
6128 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6130 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6132 (define_insn "*<code>v8hi3"
6133 [(set (match_operand:V8HI 0 "register_operand" "=x")
6135 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6136 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6137 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6138 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6139 [(set_attr "type" "sseiadd")
6140 (set_attr "prefix_data16" "1")
6141 (set_attr "mode" "TI")])
6143 (define_expand "umaxv8hi3"
6144 [(set (match_operand:V8HI 0 "register_operand" "")
6145 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6146 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6150 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6153 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6154 if (rtx_equal_p (op3, op2))
6155 op3 = gen_reg_rtx (V8HImode);
6156 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6157 emit_insn (gen_addv8hi3 (op0, op3, op2));
6162 (define_expand "smax<mode>3"
6163 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6164 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6165 (match_operand:SSEMODE14 2 "register_operand" "")))]
6169 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6175 xops[0] = operands[0];
6176 xops[1] = operands[1];
6177 xops[2] = operands[2];
6178 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6179 xops[4] = operands[1];
6180 xops[5] = operands[2];
6181 ok = ix86_expand_int_vcond (xops);
6187 (define_insn "*sse4_1_<code><mode>3"
6188 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6190 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6191 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6192 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6193 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6194 [(set_attr "type" "sseiadd")
6195 (set_attr "prefix_extra" "1")
6196 (set_attr "mode" "TI")])
6198 (define_expand "smaxv2di3"
6199 [(set (match_operand:V2DI 0 "register_operand" "")
6200 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6201 (match_operand:V2DI 2 "register_operand" "")))]
6207 xops[0] = operands[0];
6208 xops[1] = operands[1];
6209 xops[2] = operands[2];
6210 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6211 xops[4] = operands[1];
6212 xops[5] = operands[2];
6213 ok = ix86_expand_int_vcond (xops);
6218 (define_expand "umaxv4si3"
6219 [(set (match_operand:V4SI 0 "register_operand" "")
6220 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6221 (match_operand:V4SI 2 "register_operand" "")))]
6225 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6231 xops[0] = operands[0];
6232 xops[1] = operands[1];
6233 xops[2] = operands[2];
6234 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6235 xops[4] = operands[1];
6236 xops[5] = operands[2];
6237 ok = ix86_expand_int_vcond (xops);
6243 (define_insn "*sse4_1_<code><mode>3"
6244 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6246 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6247 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6248 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6249 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6250 [(set_attr "type" "sseiadd")
6251 (set_attr "prefix_extra" "1")
6252 (set_attr "mode" "TI")])
6254 (define_expand "umaxv2di3"
6255 [(set (match_operand:V2DI 0 "register_operand" "")
6256 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6257 (match_operand:V2DI 2 "register_operand" "")))]
6263 xops[0] = operands[0];
6264 xops[1] = operands[1];
6265 xops[2] = operands[2];
6266 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6267 xops[4] = operands[1];
6268 xops[5] = operands[2];
6269 ok = ix86_expand_int_vcond (xops);
6274 (define_expand "smin<mode>3"
6275 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6276 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6277 (match_operand:SSEMODE14 2 "register_operand" "")))]
6281 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6287 xops[0] = operands[0];
6288 xops[1] = operands[2];
6289 xops[2] = operands[1];
6290 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6291 xops[4] = operands[1];
6292 xops[5] = operands[2];
6293 ok = ix86_expand_int_vcond (xops);
6299 (define_expand "sminv2di3"
6300 [(set (match_operand:V2DI 0 "register_operand" "")
6301 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6302 (match_operand:V2DI 2 "register_operand" "")))]
6308 xops[0] = operands[0];
6309 xops[1] = operands[2];
6310 xops[2] = operands[1];
6311 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6312 xops[4] = operands[1];
6313 xops[5] = operands[2];
6314 ok = ix86_expand_int_vcond (xops);
6319 (define_expand "umin<mode>3"
6320 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6321 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6322 (match_operand:SSEMODE24 2 "register_operand" "")))]
6326 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6332 xops[0] = operands[0];
6333 xops[1] = operands[2];
6334 xops[2] = operands[1];
6335 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6336 xops[4] = operands[1];
6337 xops[5] = operands[2];
6338 ok = ix86_expand_int_vcond (xops);
6344 (define_expand "uminv2di3"
6345 [(set (match_operand:V2DI 0 "register_operand" "")
6346 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6347 (match_operand:V2DI 2 "register_operand" "")))]
6353 xops[0] = operands[0];
6354 xops[1] = operands[2];
6355 xops[2] = operands[1];
6356 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6357 xops[4] = operands[1];
6358 xops[5] = operands[2];
6359 ok = ix86_expand_int_vcond (xops);
6364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6366 ;; Parallel integral comparisons
6368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6370 (define_expand "sse2_eq<mode>3"
6371 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6373 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6374 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6375 "TARGET_SSE2 && !TARGET_XOP "
6376 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6378 (define_insn "*avx_eq<mode>3"
6379 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6381 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6382 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6383 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6384 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6385 [(set_attr "type" "ssecmp")
6386 (set (attr "prefix_extra")
6387 (if_then_else (match_operand:V2DI 0 "" "")
6389 (const_string "*")))
6390 (set_attr "prefix" "vex")
6391 (set_attr "mode" "TI")])
6393 (define_insn "*sse2_eq<mode>3"
6394 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6396 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6397 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6398 "TARGET_SSE2 && !TARGET_XOP
6399 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6400 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6401 [(set_attr "type" "ssecmp")
6402 (set_attr "prefix_data16" "1")
6403 (set_attr "mode" "TI")])
6405 (define_expand "sse4_1_eqv2di3"
6406 [(set (match_operand:V2DI 0 "register_operand" "")
6408 (match_operand:V2DI 1 "nonimmediate_operand" "")
6409 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6411 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6413 (define_insn "*sse4_1_eqv2di3"
6414 [(set (match_operand:V2DI 0 "register_operand" "=x")
6416 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6417 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6418 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6419 "pcmpeqq\t{%2, %0|%0, %2}"
6420 [(set_attr "type" "ssecmp")
6421 (set_attr "prefix_extra" "1")
6422 (set_attr "mode" "TI")])
6424 (define_insn "*avx_gt<mode>3"
6425 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6427 (match_operand:SSEMODE1248 1 "register_operand" "x")
6428 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6430 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6431 [(set_attr "type" "ssecmp")
6432 (set (attr "prefix_extra")
6433 (if_then_else (match_operand:V2DI 0 "" "")
6435 (const_string "*")))
6436 (set_attr "prefix" "vex")
6437 (set_attr "mode" "TI")])
6439 (define_insn "sse2_gt<mode>3"
6440 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6442 (match_operand:SSEMODE124 1 "register_operand" "0")
6443 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6444 "TARGET_SSE2 && !TARGET_XOP"
6445 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6446 [(set_attr "type" "ssecmp")
6447 (set_attr "prefix_data16" "1")
6448 (set_attr "mode" "TI")])
6450 (define_insn "sse4_2_gtv2di3"
6451 [(set (match_operand:V2DI 0 "register_operand" "=x")
6453 (match_operand:V2DI 1 "register_operand" "0")
6454 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6456 "pcmpgtq\t{%2, %0|%0, %2}"
6457 [(set_attr "type" "ssecmp")
6458 (set_attr "prefix_extra" "1")
6459 (set_attr "mode" "TI")])
6461 (define_expand "vcond<mode>"
6462 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6463 (if_then_else:SSEMODE124C8
6464 (match_operator 3 ""
6465 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6466 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6467 (match_operand:SSEMODE124C8 1 "general_operand" "")
6468 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6471 bool ok = ix86_expand_int_vcond (operands);
6476 (define_expand "vcondu<mode>"
6477 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6478 (if_then_else:SSEMODE124C8
6479 (match_operator 3 ""
6480 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6481 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6482 (match_operand:SSEMODE124C8 1 "general_operand" "")
6483 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6486 bool ok = ix86_expand_int_vcond (operands);
6491 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6493 ;; Parallel bitwise logical operations
6495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6497 (define_expand "one_cmpl<mode>2"
6498 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6499 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6503 int i, n = GET_MODE_NUNITS (<MODE>mode);
6504 rtvec v = rtvec_alloc (n);
6506 for (i = 0; i < n; ++i)
6507 RTVEC_ELT (v, i) = constm1_rtx;
6509 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6512 (define_insn "*avx_andnot<mode>3"
6513 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6515 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6516 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6518 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6519 [(set_attr "type" "sselog")
6520 (set_attr "prefix" "vex")
6521 (set_attr "mode" "<avxvecpsmode>")])
6523 (define_insn "*sse_andnot<mode>3"
6524 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6526 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6527 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6528 "(TARGET_SSE && !TARGET_SSE2)"
6529 "andnps\t{%2, %0|%0, %2}"
6530 [(set_attr "type" "sselog")
6531 (set_attr "mode" "V4SF")])
6533 (define_insn "*avx_andnot<mode>3"
6534 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6536 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6537 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6539 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6540 [(set_attr "type" "sselog")
6541 (set_attr "prefix" "vex")
6542 (set_attr "mode" "TI")])
6544 (define_insn "sse2_andnot<mode>3"
6545 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6547 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6548 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6550 "pandn\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "prefix_data16" "1")
6553 (set_attr "mode" "TI")])
6555 (define_insn "*andnottf3"
6556 [(set (match_operand:TF 0 "register_operand" "=x")
6558 (not:TF (match_operand:TF 1 "register_operand" "0"))
6559 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6561 "pandn\t{%2, %0|%0, %2}"
6562 [(set_attr "type" "sselog")
6563 (set_attr "prefix_data16" "1")
6564 (set_attr "mode" "TI")])
6566 (define_expand "<code><mode>3"
6567 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6569 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6570 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6572 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6574 (define_insn "*avx_<code><mode>3"
6575 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6576 (any_logic:AVX256MODEI
6577 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6578 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6580 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6581 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6582 [(set_attr "type" "sselog")
6583 (set_attr "prefix" "vex")
6584 (set_attr "mode" "<avxvecpsmode>")])
6586 (define_insn "*sse_<code><mode>3"
6587 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6589 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6590 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6591 "(TARGET_SSE && !TARGET_SSE2)
6592 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6593 "<logic>ps\t{%2, %0|%0, %2}"
6594 [(set_attr "type" "sselog")
6595 (set_attr "mode" "V4SF")])
6597 (define_insn "*avx_<code><mode>3"
6598 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6600 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6601 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6603 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6604 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6605 [(set_attr "type" "sselog")
6606 (set_attr "prefix" "vex")
6607 (set_attr "mode" "TI")])
6609 (define_insn "*sse2_<code><mode>3"
6610 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6612 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6613 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6614 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6615 "p<logic>\t{%2, %0|%0, %2}"
6616 [(set_attr "type" "sselog")
6617 (set_attr "prefix_data16" "1")
6618 (set_attr "mode" "TI")])
6620 (define_expand "<code>tf3"
6621 [(set (match_operand:TF 0 "register_operand" "")
6623 (match_operand:TF 1 "nonimmediate_operand" "")
6624 (match_operand:TF 2 "nonimmediate_operand" "")))]
6626 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6628 (define_insn "*<code>tf3"
6629 [(set (match_operand:TF 0 "register_operand" "=x")
6631 (match_operand:TF 1 "nonimmediate_operand" "%0")
6632 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6633 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6634 "p<logic>\t{%2, %0|%0, %2}"
6635 [(set_attr "type" "sselog")
6636 (set_attr "prefix_data16" "1")
6637 (set_attr "mode" "TI")])
6639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6641 ;; Parallel integral element swizzling
6643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6645 (define_expand "vec_pack_trunc_v8hi"
6646 [(match_operand:V16QI 0 "register_operand" "")
6647 (match_operand:V8HI 1 "register_operand" "")
6648 (match_operand:V8HI 2 "register_operand" "")]
6651 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6652 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6653 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6657 (define_expand "vec_pack_trunc_v4si"
6658 [(match_operand:V8HI 0 "register_operand" "")
6659 (match_operand:V4SI 1 "register_operand" "")
6660 (match_operand:V4SI 2 "register_operand" "")]
6663 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6664 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6665 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6669 (define_expand "vec_pack_trunc_v2di"
6670 [(match_operand:V4SI 0 "register_operand" "")
6671 (match_operand:V2DI 1 "register_operand" "")
6672 (match_operand:V2DI 2 "register_operand" "")]
6675 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6676 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6677 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6681 (define_insn "*avx_packsswb"
6682 [(set (match_operand:V16QI 0 "register_operand" "=x")
6685 (match_operand:V8HI 1 "register_operand" "x"))
6687 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6689 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6690 [(set_attr "type" "sselog")
6691 (set_attr "prefix" "vex")
6692 (set_attr "mode" "TI")])
6694 (define_insn "sse2_packsswb"
6695 [(set (match_operand:V16QI 0 "register_operand" "=x")
6698 (match_operand:V8HI 1 "register_operand" "0"))
6700 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6702 "packsswb\t{%2, %0|%0, %2}"
6703 [(set_attr "type" "sselog")
6704 (set_attr "prefix_data16" "1")
6705 (set_attr "mode" "TI")])
6707 (define_insn "*avx_packssdw"
6708 [(set (match_operand:V8HI 0 "register_operand" "=x")
6711 (match_operand:V4SI 1 "register_operand" "x"))
6713 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6715 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6716 [(set_attr "type" "sselog")
6717 (set_attr "prefix" "vex")
6718 (set_attr "mode" "TI")])
6720 (define_insn "sse2_packssdw"
6721 [(set (match_operand:V8HI 0 "register_operand" "=x")
6724 (match_operand:V4SI 1 "register_operand" "0"))
6726 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6728 "packssdw\t{%2, %0|%0, %2}"
6729 [(set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1")
6731 (set_attr "mode" "TI")])
6733 (define_insn "*avx_packuswb"
6734 [(set (match_operand:V16QI 0 "register_operand" "=x")
6737 (match_operand:V8HI 1 "register_operand" "x"))
6739 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6741 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6742 [(set_attr "type" "sselog")
6743 (set_attr "prefix" "vex")
6744 (set_attr "mode" "TI")])
6746 (define_insn "sse2_packuswb"
6747 [(set (match_operand:V16QI 0 "register_operand" "=x")
6750 (match_operand:V8HI 1 "register_operand" "0"))
6752 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6754 "packuswb\t{%2, %0|%0, %2}"
6755 [(set_attr "type" "sselog")
6756 (set_attr "prefix_data16" "1")
6757 (set_attr "mode" "TI")])
6759 (define_insn "*avx_interleave_highv16qi"
6760 [(set (match_operand:V16QI 0 "register_operand" "=x")
6763 (match_operand:V16QI 1 "register_operand" "x")
6764 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6765 (parallel [(const_int 8) (const_int 24)
6766 (const_int 9) (const_int 25)
6767 (const_int 10) (const_int 26)
6768 (const_int 11) (const_int 27)
6769 (const_int 12) (const_int 28)
6770 (const_int 13) (const_int 29)
6771 (const_int 14) (const_int 30)
6772 (const_int 15) (const_int 31)])))]
6774 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6775 [(set_attr "type" "sselog")
6776 (set_attr "prefix" "vex")
6777 (set_attr "mode" "TI")])
6779 (define_insn "vec_interleave_highv16qi"
6780 [(set (match_operand:V16QI 0 "register_operand" "=x")
6783 (match_operand:V16QI 1 "register_operand" "0")
6784 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6785 (parallel [(const_int 8) (const_int 24)
6786 (const_int 9) (const_int 25)
6787 (const_int 10) (const_int 26)
6788 (const_int 11) (const_int 27)
6789 (const_int 12) (const_int 28)
6790 (const_int 13) (const_int 29)
6791 (const_int 14) (const_int 30)
6792 (const_int 15) (const_int 31)])))]
6794 "punpckhbw\t{%2, %0|%0, %2}"
6795 [(set_attr "type" "sselog")
6796 (set_attr "prefix_data16" "1")
6797 (set_attr "mode" "TI")])
6799 (define_insn "*avx_interleave_lowv16qi"
6800 [(set (match_operand:V16QI 0 "register_operand" "=x")
6803 (match_operand:V16QI 1 "register_operand" "x")
6804 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6805 (parallel [(const_int 0) (const_int 16)
6806 (const_int 1) (const_int 17)
6807 (const_int 2) (const_int 18)
6808 (const_int 3) (const_int 19)
6809 (const_int 4) (const_int 20)
6810 (const_int 5) (const_int 21)
6811 (const_int 6) (const_int 22)
6812 (const_int 7) (const_int 23)])))]
6814 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6815 [(set_attr "type" "sselog")
6816 (set_attr "prefix" "vex")
6817 (set_attr "mode" "TI")])
6819 (define_insn "vec_interleave_lowv16qi"
6820 [(set (match_operand:V16QI 0 "register_operand" "=x")
6823 (match_operand:V16QI 1 "register_operand" "0")
6824 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6825 (parallel [(const_int 0) (const_int 16)
6826 (const_int 1) (const_int 17)
6827 (const_int 2) (const_int 18)
6828 (const_int 3) (const_int 19)
6829 (const_int 4) (const_int 20)
6830 (const_int 5) (const_int 21)
6831 (const_int 6) (const_int 22)
6832 (const_int 7) (const_int 23)])))]
6834 "punpcklbw\t{%2, %0|%0, %2}"
6835 [(set_attr "type" "sselog")
6836 (set_attr "prefix_data16" "1")
6837 (set_attr "mode" "TI")])
6839 (define_insn "*avx_interleave_highv8hi"
6840 [(set (match_operand:V8HI 0 "register_operand" "=x")
6843 (match_operand:V8HI 1 "register_operand" "x")
6844 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6845 (parallel [(const_int 4) (const_int 12)
6846 (const_int 5) (const_int 13)
6847 (const_int 6) (const_int 14)
6848 (const_int 7) (const_int 15)])))]
6850 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix" "vex")
6853 (set_attr "mode" "TI")])
6855 (define_insn "vec_interleave_highv8hi"
6856 [(set (match_operand:V8HI 0 "register_operand" "=x")
6859 (match_operand:V8HI 1 "register_operand" "0")
6860 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6861 (parallel [(const_int 4) (const_int 12)
6862 (const_int 5) (const_int 13)
6863 (const_int 6) (const_int 14)
6864 (const_int 7) (const_int 15)])))]
6866 "punpckhwd\t{%2, %0|%0, %2}"
6867 [(set_attr "type" "sselog")
6868 (set_attr "prefix_data16" "1")
6869 (set_attr "mode" "TI")])
6871 (define_insn "*avx_interleave_lowv8hi"
6872 [(set (match_operand:V8HI 0 "register_operand" "=x")
6875 (match_operand:V8HI 1 "register_operand" "x")
6876 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6877 (parallel [(const_int 0) (const_int 8)
6878 (const_int 1) (const_int 9)
6879 (const_int 2) (const_int 10)
6880 (const_int 3) (const_int 11)])))]
6882 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6883 [(set_attr "type" "sselog")
6884 (set_attr "prefix" "vex")
6885 (set_attr "mode" "TI")])
6887 (define_insn "vec_interleave_lowv8hi"
6888 [(set (match_operand:V8HI 0 "register_operand" "=x")
6891 (match_operand:V8HI 1 "register_operand" "0")
6892 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6893 (parallel [(const_int 0) (const_int 8)
6894 (const_int 1) (const_int 9)
6895 (const_int 2) (const_int 10)
6896 (const_int 3) (const_int 11)])))]
6898 "punpcklwd\t{%2, %0|%0, %2}"
6899 [(set_attr "type" "sselog")
6900 (set_attr "prefix_data16" "1")
6901 (set_attr "mode" "TI")])
6903 (define_insn "*avx_interleave_highv4si"
6904 [(set (match_operand:V4SI 0 "register_operand" "=x")
6907 (match_operand:V4SI 1 "register_operand" "x")
6908 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6909 (parallel [(const_int 2) (const_int 6)
6910 (const_int 3) (const_int 7)])))]
6912 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6913 [(set_attr "type" "sselog")
6914 (set_attr "prefix" "vex")
6915 (set_attr "mode" "TI")])
6917 (define_insn "vec_interleave_highv4si"
6918 [(set (match_operand:V4SI 0 "register_operand" "=x")
6921 (match_operand:V4SI 1 "register_operand" "0")
6922 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6923 (parallel [(const_int 2) (const_int 6)
6924 (const_int 3) (const_int 7)])))]
6926 "punpckhdq\t{%2, %0|%0, %2}"
6927 [(set_attr "type" "sselog")
6928 (set_attr "prefix_data16" "1")
6929 (set_attr "mode" "TI")])
6931 (define_insn "*avx_interleave_lowv4si"
6932 [(set (match_operand:V4SI 0 "register_operand" "=x")
6935 (match_operand:V4SI 1 "register_operand" "x")
6936 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6937 (parallel [(const_int 0) (const_int 4)
6938 (const_int 1) (const_int 5)])))]
6940 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6941 [(set_attr "type" "sselog")
6942 (set_attr "prefix" "vex")
6943 (set_attr "mode" "TI")])
6945 (define_insn "vec_interleave_lowv4si"
6946 [(set (match_operand:V4SI 0 "register_operand" "=x")
6949 (match_operand:V4SI 1 "register_operand" "0")
6950 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6951 (parallel [(const_int 0) (const_int 4)
6952 (const_int 1) (const_int 5)])))]
6954 "punpckldq\t{%2, %0|%0, %2}"
6955 [(set_attr "type" "sselog")
6956 (set_attr "prefix_data16" "1")
6957 (set_attr "mode" "TI")])
6959 (define_insn "*avx_pinsr<ssevecsize>"
6960 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6961 (vec_merge:SSEMODE124
6962 (vec_duplicate:SSEMODE124
6963 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6964 (match_operand:SSEMODE124 1 "register_operand" "x")
6965 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6968 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6969 if (MEM_P (operands[2]))
6970 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6972 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6974 [(set_attr "type" "sselog")
6975 (set (attr "prefix_extra")
6976 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6978 (const_string "1")))
6979 (set_attr "length_immediate" "1")
6980 (set_attr "prefix" "vex")
6981 (set_attr "mode" "TI")])
6983 (define_insn "*sse4_1_pinsrb"
6984 [(set (match_operand:V16QI 0 "register_operand" "=x")
6986 (vec_duplicate:V16QI
6987 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6988 (match_operand:V16QI 1 "register_operand" "0")
6989 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6992 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6993 if (MEM_P (operands[2]))
6994 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6996 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6998 [(set_attr "type" "sselog")
6999 (set_attr "prefix_extra" "1")
7000 (set_attr "length_immediate" "1")
7001 (set_attr "mode" "TI")])
7003 (define_insn "*sse2_pinsrw"
7004 [(set (match_operand:V8HI 0 "register_operand" "=x")
7007 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7008 (match_operand:V8HI 1 "register_operand" "0")
7009 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7012 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7013 if (MEM_P (operands[2]))
7014 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7016 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7018 [(set_attr "type" "sselog")
7019 (set_attr "prefix_data16" "1")
7020 (set_attr "length_immediate" "1")
7021 (set_attr "mode" "TI")])
7023 ;; It must come before sse2_loadld since it is preferred.
7024 (define_insn "*sse4_1_pinsrd"
7025 [(set (match_operand:V4SI 0 "register_operand" "=x")
7028 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7029 (match_operand:V4SI 1 "register_operand" "0")
7030 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7034 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7036 [(set_attr "type" "sselog")
7037 (set_attr "prefix_extra" "1")
7038 (set_attr "length_immediate" "1")
7039 (set_attr "mode" "TI")])
7041 (define_insn "*avx_pinsrq"
7042 [(set (match_operand:V2DI 0 "register_operand" "=x")
7045 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7046 (match_operand:V2DI 1 "register_operand" "x")
7047 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7048 "TARGET_AVX && TARGET_64BIT"
7050 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7051 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7053 [(set_attr "type" "sselog")
7054 (set_attr "prefix_extra" "1")
7055 (set_attr "length_immediate" "1")
7056 (set_attr "prefix" "vex")
7057 (set_attr "mode" "TI")])
7059 (define_insn "*sse4_1_pinsrq"
7060 [(set (match_operand:V2DI 0 "register_operand" "=x")
7063 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7064 (match_operand:V2DI 1 "register_operand" "0")
7065 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7066 "TARGET_SSE4_1 && TARGET_64BIT"
7068 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7069 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7071 [(set_attr "type" "sselog")
7072 (set_attr "prefix_rex" "1")
7073 (set_attr "prefix_extra" "1")
7074 (set_attr "length_immediate" "1")
7075 (set_attr "mode" "TI")])
7077 (define_insn "*sse4_1_pextrb"
7078 [(set (match_operand:SI 0 "register_operand" "=r")
7081 (match_operand:V16QI 1 "register_operand" "x")
7082 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7084 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7085 [(set_attr "type" "sselog")
7086 (set_attr "prefix_extra" "1")
7087 (set_attr "length_immediate" "1")
7088 (set_attr "prefix" "maybe_vex")
7089 (set_attr "mode" "TI")])
7091 (define_insn "*sse4_1_pextrb_memory"
7092 [(set (match_operand:QI 0 "memory_operand" "=m")
7094 (match_operand:V16QI 1 "register_operand" "x")
7095 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7097 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix_extra" "1")
7100 (set_attr "length_immediate" "1")
7101 (set_attr "prefix" "maybe_vex")
7102 (set_attr "mode" "TI")])
7104 (define_insn "*sse2_pextrw"
7105 [(set (match_operand:SI 0 "register_operand" "=r")
7108 (match_operand:V8HI 1 "register_operand" "x")
7109 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7111 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7112 [(set_attr "type" "sselog")
7113 (set_attr "prefix_data16" "1")
7114 (set_attr "length_immediate" "1")
7115 (set_attr "prefix" "maybe_vex")
7116 (set_attr "mode" "TI")])
7118 (define_insn "*sse4_1_pextrw_memory"
7119 [(set (match_operand:HI 0 "memory_operand" "=m")
7121 (match_operand:V8HI 1 "register_operand" "x")
7122 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7124 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7125 [(set_attr "type" "sselog")
7126 (set_attr "prefix_extra" "1")
7127 (set_attr "length_immediate" "1")
7128 (set_attr "prefix" "maybe_vex")
7129 (set_attr "mode" "TI")])
7131 (define_insn "*sse4_1_pextrd"
7132 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7134 (match_operand:V4SI 1 "register_operand" "x")
7135 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7137 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7138 [(set_attr "type" "sselog")
7139 (set_attr "prefix_extra" "1")
7140 (set_attr "length_immediate" "1")
7141 (set_attr "prefix" "maybe_vex")
7142 (set_attr "mode" "TI")])
7144 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7145 (define_insn "*sse4_1_pextrq"
7146 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7148 (match_operand:V2DI 1 "register_operand" "x")
7149 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7150 "TARGET_SSE4_1 && TARGET_64BIT"
7151 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7152 [(set_attr "type" "sselog")
7153 (set_attr "prefix_rex" "1")
7154 (set_attr "prefix_extra" "1")
7155 (set_attr "length_immediate" "1")
7156 (set_attr "prefix" "maybe_vex")
7157 (set_attr "mode" "TI")])
7159 (define_expand "sse2_pshufd"
7160 [(match_operand:V4SI 0 "register_operand" "")
7161 (match_operand:V4SI 1 "nonimmediate_operand" "")
7162 (match_operand:SI 2 "const_int_operand" "")]
7165 int mask = INTVAL (operands[2]);
7166 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7167 GEN_INT ((mask >> 0) & 3),
7168 GEN_INT ((mask >> 2) & 3),
7169 GEN_INT ((mask >> 4) & 3),
7170 GEN_INT ((mask >> 6) & 3)));
7174 (define_insn "sse2_pshufd_1"
7175 [(set (match_operand:V4SI 0 "register_operand" "=x")
7177 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7178 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7179 (match_operand 3 "const_0_to_3_operand" "")
7180 (match_operand 4 "const_0_to_3_operand" "")
7181 (match_operand 5 "const_0_to_3_operand" "")])))]
7185 mask |= INTVAL (operands[2]) << 0;
7186 mask |= INTVAL (operands[3]) << 2;
7187 mask |= INTVAL (operands[4]) << 4;
7188 mask |= INTVAL (operands[5]) << 6;
7189 operands[2] = GEN_INT (mask);
7191 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7193 [(set_attr "type" "sselog1")
7194 (set_attr "prefix_data16" "1")
7195 (set_attr "prefix" "maybe_vex")
7196 (set_attr "length_immediate" "1")
7197 (set_attr "mode" "TI")])
7199 (define_expand "sse2_pshuflw"
7200 [(match_operand:V8HI 0 "register_operand" "")
7201 (match_operand:V8HI 1 "nonimmediate_operand" "")
7202 (match_operand:SI 2 "const_int_operand" "")]
7205 int mask = INTVAL (operands[2]);
7206 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7207 GEN_INT ((mask >> 0) & 3),
7208 GEN_INT ((mask >> 2) & 3),
7209 GEN_INT ((mask >> 4) & 3),
7210 GEN_INT ((mask >> 6) & 3)));
7214 (define_insn "sse2_pshuflw_1"
7215 [(set (match_operand:V8HI 0 "register_operand" "=x")
7217 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7218 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7219 (match_operand 3 "const_0_to_3_operand" "")
7220 (match_operand 4 "const_0_to_3_operand" "")
7221 (match_operand 5 "const_0_to_3_operand" "")
7229 mask |= INTVAL (operands[2]) << 0;
7230 mask |= INTVAL (operands[3]) << 2;
7231 mask |= INTVAL (operands[4]) << 4;
7232 mask |= INTVAL (operands[5]) << 6;
7233 operands[2] = GEN_INT (mask);
7235 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7237 [(set_attr "type" "sselog")
7238 (set_attr "prefix_data16" "0")
7239 (set_attr "prefix_rep" "1")
7240 (set_attr "prefix" "maybe_vex")
7241 (set_attr "length_immediate" "1")
7242 (set_attr "mode" "TI")])
7244 (define_expand "sse2_pshufhw"
7245 [(match_operand:V8HI 0 "register_operand" "")
7246 (match_operand:V8HI 1 "nonimmediate_operand" "")
7247 (match_operand:SI 2 "const_int_operand" "")]
7250 int mask = INTVAL (operands[2]);
7251 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7252 GEN_INT (((mask >> 0) & 3) + 4),
7253 GEN_INT (((mask >> 2) & 3) + 4),
7254 GEN_INT (((mask >> 4) & 3) + 4),
7255 GEN_INT (((mask >> 6) & 3) + 4)));
7259 (define_insn "sse2_pshufhw_1"
7260 [(set (match_operand:V8HI 0 "register_operand" "=x")
7262 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7263 (parallel [(const_int 0)
7267 (match_operand 2 "const_4_to_7_operand" "")
7268 (match_operand 3 "const_4_to_7_operand" "")
7269 (match_operand 4 "const_4_to_7_operand" "")
7270 (match_operand 5 "const_4_to_7_operand" "")])))]
7274 mask |= (INTVAL (operands[2]) - 4) << 0;
7275 mask |= (INTVAL (operands[3]) - 4) << 2;
7276 mask |= (INTVAL (operands[4]) - 4) << 4;
7277 mask |= (INTVAL (operands[5]) - 4) << 6;
7278 operands[2] = GEN_INT (mask);
7280 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7282 [(set_attr "type" "sselog")
7283 (set_attr "prefix_rep" "1")
7284 (set_attr "prefix_data16" "0")
7285 (set_attr "prefix" "maybe_vex")
7286 (set_attr "length_immediate" "1")
7287 (set_attr "mode" "TI")])
7289 (define_expand "sse2_loadd"
7290 [(set (match_operand:V4SI 0 "register_operand" "")
7293 (match_operand:SI 1 "nonimmediate_operand" ""))
7297 "operands[2] = CONST0_RTX (V4SImode);")
7299 (define_insn "*avx_loadld"
7300 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7303 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7304 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7308 vmovd\t{%2, %0|%0, %2}
7309 vmovd\t{%2, %0|%0, %2}
7310 vmovss\t{%2, %1, %0|%0, %1, %2}"
7311 [(set_attr "type" "ssemov")
7312 (set_attr "prefix" "vex")
7313 (set_attr "mode" "TI,TI,V4SF")])
7315 (define_insn "sse2_loadld"
7316 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7319 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7320 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7324 movd\t{%2, %0|%0, %2}
7325 movd\t{%2, %0|%0, %2}
7326 movss\t{%2, %0|%0, %2}
7327 movss\t{%2, %0|%0, %2}"
7328 [(set_attr "type" "ssemov")
7329 (set_attr "mode" "TI,TI,V4SF,SF")])
7331 (define_insn_and_split "sse2_stored"
7332 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7334 (match_operand:V4SI 1 "register_operand" "x,Yi")
7335 (parallel [(const_int 0)])))]
7338 "&& reload_completed
7339 && (TARGET_INTER_UNIT_MOVES
7340 || MEM_P (operands [0])
7341 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7342 [(set (match_dup 0) (match_dup 1))]
7344 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7347 (define_insn_and_split "*vec_ext_v4si_mem"
7348 [(set (match_operand:SI 0 "register_operand" "=r")
7350 (match_operand:V4SI 1 "memory_operand" "o")
7351 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7357 int i = INTVAL (operands[2]);
7359 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7363 (define_expand "sse_storeq"
7364 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7366 (match_operand:V2DI 1 "register_operand" "")
7367 (parallel [(const_int 0)])))]
7371 (define_insn "*sse2_storeq_rex64"
7372 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7374 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7375 (parallel [(const_int 0)])))]
7376 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7380 %vmov{q}\t{%1, %0|%0, %1}"
7381 [(set_attr "type" "*,*,imov")
7382 (set_attr "prefix" "*,*,maybe_vex")
7383 (set_attr "mode" "*,*,DI")])
7385 (define_insn "*sse2_storeq"
7386 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7388 (match_operand:V2DI 1 "register_operand" "x")
7389 (parallel [(const_int 0)])))]
7394 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7396 (match_operand:V2DI 1 "register_operand" "")
7397 (parallel [(const_int 0)])))]
7400 && (TARGET_INTER_UNIT_MOVES
7401 || MEM_P (operands [0])
7402 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7403 [(set (match_dup 0) (match_dup 1))]
7405 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7408 (define_insn "*vec_extractv2di_1_rex64_avx"
7409 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7411 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7412 (parallel [(const_int 1)])))]
7415 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7417 vmovhps\t{%1, %0|%0, %1}
7418 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7419 vmovq\t{%H1, %0|%0, %H1}
7420 vmov{q}\t{%H1, %0|%0, %H1}"
7421 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7422 (set_attr "length_immediate" "*,1,*,*")
7423 (set_attr "memory" "*,none,*,*")
7424 (set_attr "prefix" "vex")
7425 (set_attr "mode" "V2SF,TI,TI,DI")])
7427 (define_insn "*vec_extractv2di_1_rex64"
7428 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7430 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7431 (parallel [(const_int 1)])))]
7432 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7434 movhps\t{%1, %0|%0, %1}
7435 psrldq\t{$8, %0|%0, 8}
7436 movq\t{%H1, %0|%0, %H1}
7437 mov{q}\t{%H1, %0|%0, %H1}"
7438 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7439 (set_attr "length_immediate" "*,1,*,*")
7440 (set_attr "memory" "*,none,*,*")
7441 (set_attr "mode" "V2SF,TI,TI,DI")])
7443 (define_insn "*vec_extractv2di_1_avx"
7444 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7446 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7447 (parallel [(const_int 1)])))]
7450 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7452 vmovhps\t{%1, %0|%0, %1}
7453 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7454 vmovq\t{%H1, %0|%0, %H1}"
7455 [(set_attr "type" "ssemov,sseishft1,ssemov")
7456 (set_attr "length_immediate" "*,1,*")
7457 (set_attr "memory" "*,none,*")
7458 (set_attr "prefix" "vex")
7459 (set_attr "mode" "V2SF,TI,TI")])
7461 (define_insn "*vec_extractv2di_1_sse2"
7462 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7464 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7465 (parallel [(const_int 1)])))]
7467 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7469 movhps\t{%1, %0|%0, %1}
7470 psrldq\t{$8, %0|%0, 8}
7471 movq\t{%H1, %0|%0, %H1}"
7472 [(set_attr "type" "ssemov,sseishft1,ssemov")
7473 (set_attr "length_immediate" "*,1,*")
7474 (set_attr "memory" "*,none,*")
7475 (set_attr "mode" "V2SF,TI,TI")])
7477 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7478 (define_insn "*vec_extractv2di_1_sse"
7479 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7481 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7482 (parallel [(const_int 1)])))]
7483 "!TARGET_SSE2 && TARGET_SSE
7484 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7486 movhps\t{%1, %0|%0, %1}
7487 movhlps\t{%1, %0|%0, %1}
7488 movlps\t{%H1, %0|%0, %H1}"
7489 [(set_attr "type" "ssemov")
7490 (set_attr "mode" "V2SF,V4SF,V2SF")])
7492 (define_insn "*vec_dupv4si_avx"
7493 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7495 (match_operand:SI 1 "register_operand" "x,m")))]
7498 vpshufd\t{$0, %1, %0|%0, %1, 0}
7499 vbroadcastss\t{%1, %0|%0, %1}"
7500 [(set_attr "type" "sselog1,ssemov")
7501 (set_attr "length_immediate" "1,0")
7502 (set_attr "prefix_extra" "0,1")
7503 (set_attr "prefix" "vex")
7504 (set_attr "mode" "TI,V4SF")])
7506 (define_insn "*vec_dupv4si"
7507 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7509 (match_operand:SI 1 "register_operand" " Y2,0")))]
7512 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7513 shufps\t{$0, %0, %0|%0, %0, 0}"
7514 [(set_attr "type" "sselog1")
7515 (set_attr "length_immediate" "1")
7516 (set_attr "mode" "TI,V4SF")])
7518 (define_insn "*vec_dupv2di_avx"
7519 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7521 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7524 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7525 vmovddup\t{%1, %0|%0, %1}"
7526 [(set_attr "type" "sselog1")
7527 (set_attr "prefix" "vex")
7528 (set_attr "mode" "TI,DF")])
7530 (define_insn "*vec_dupv2di_sse3"
7531 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7533 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7537 movddup\t{%1, %0|%0, %1}"
7538 [(set_attr "type" "sselog1")
7539 (set_attr "mode" "TI,DF")])
7541 (define_insn "*vec_dupv2di"
7542 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7544 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7549 [(set_attr "type" "sselog1,ssemov")
7550 (set_attr "mode" "TI,V4SF")])
7552 (define_insn "*vec_concatv2si_avx"
7553 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7555 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7556 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7559 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7560 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7561 vmovd\t{%1, %0|%0, %1}
7562 punpckldq\t{%2, %0|%0, %2}
7563 movd\t{%1, %0|%0, %1}"
7564 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7565 (set_attr "prefix_extra" "1,*,*,*,*")
7566 (set_attr "length_immediate" "1,*,*,*,*")
7567 (set (attr "prefix")
7568 (if_then_else (eq_attr "alternative" "3,4")
7569 (const_string "orig")
7570 (const_string "vex")))
7571 (set_attr "mode" "TI,TI,TI,DI,DI")])
7573 (define_insn "*vec_concatv2si_sse4_1"
7574 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7576 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7577 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7580 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7581 punpckldq\t{%2, %0|%0, %2}
7582 movd\t{%1, %0|%0, %1}
7583 punpckldq\t{%2, %0|%0, %2}
7584 movd\t{%1, %0|%0, %1}"
7585 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7586 (set_attr "prefix_extra" "1,*,*,*,*")
7587 (set_attr "length_immediate" "1,*,*,*,*")
7588 (set_attr "mode" "TI,TI,TI,DI,DI")])
7590 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7591 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7592 ;; alternatives pretty much forces the MMX alternative to be chosen.
7593 (define_insn "*vec_concatv2si_sse2"
7594 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7596 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7597 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7600 punpckldq\t{%2, %0|%0, %2}
7601 movd\t{%1, %0|%0, %1}
7602 punpckldq\t{%2, %0|%0, %2}
7603 movd\t{%1, %0|%0, %1}"
7604 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7605 (set_attr "mode" "TI,TI,DI,DI")])
7607 (define_insn "*vec_concatv2si_sse"
7608 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7610 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7611 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7614 unpcklps\t{%2, %0|%0, %2}
7615 movss\t{%1, %0|%0, %1}
7616 punpckldq\t{%2, %0|%0, %2}
7617 movd\t{%1, %0|%0, %1}"
7618 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7619 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7621 (define_insn "*vec_concatv4si_1_avx"
7622 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7624 (match_operand:V2SI 1 "register_operand" " x,x")
7625 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7628 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7629 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7630 [(set_attr "type" "sselog,ssemov")
7631 (set_attr "prefix" "vex")
7632 (set_attr "mode" "TI,V2SF")])
7634 (define_insn "*vec_concatv4si_1"
7635 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7637 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7638 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7641 punpcklqdq\t{%2, %0|%0, %2}
7642 movlhps\t{%2, %0|%0, %2}
7643 movhps\t{%2, %0|%0, %2}"
7644 [(set_attr "type" "sselog,ssemov,ssemov")
7645 (set_attr "mode" "TI,V4SF,V2SF")])
7647 (define_insn "*vec_concatv2di_avx"
7648 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7650 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7651 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7652 "!TARGET_64BIT && TARGET_AVX"
7654 vmovq\t{%1, %0|%0, %1}
7655 movq2dq\t{%1, %0|%0, %1}
7656 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7657 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7658 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7659 (set (attr "prefix")
7660 (if_then_else (eq_attr "alternative" "1")
7661 (const_string "orig")
7662 (const_string "vex")))
7663 (set_attr "mode" "TI,TI,TI,V2SF")])
7665 (define_insn "vec_concatv2di"
7666 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7668 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7669 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7670 "!TARGET_64BIT && TARGET_SSE"
7672 movq\t{%1, %0|%0, %1}
7673 movq2dq\t{%1, %0|%0, %1}
7674 punpcklqdq\t{%2, %0|%0, %2}
7675 movlhps\t{%2, %0|%0, %2}
7676 movhps\t{%2, %0|%0, %2}"
7677 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7678 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7680 (define_insn "*vec_concatv2di_rex64_avx"
7681 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7683 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7684 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7685 "TARGET_64BIT && TARGET_AVX"
7687 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7688 vmovq\t{%1, %0|%0, %1}
7689 vmovq\t{%1, %0|%0, %1}
7690 movq2dq\t{%1, %0|%0, %1}
7691 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7692 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7693 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7694 (set_attr "prefix_extra" "1,*,*,*,*,*")
7695 (set_attr "length_immediate" "1,*,*,*,*,*")
7696 (set (attr "prefix")
7697 (if_then_else (eq_attr "alternative" "3")
7698 (const_string "orig")
7699 (const_string "vex")))
7700 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7702 (define_insn "*vec_concatv2di_rex64_sse4_1"
7703 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7705 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7706 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7707 "TARGET_64BIT && TARGET_SSE4_1"
7709 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7710 movq\t{%1, %0|%0, %1}
7711 movq\t{%1, %0|%0, %1}
7712 movq2dq\t{%1, %0|%0, %1}
7713 punpcklqdq\t{%2, %0|%0, %2}
7714 movlhps\t{%2, %0|%0, %2}
7715 movhps\t{%2, %0|%0, %2}"
7716 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7717 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7718 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7719 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7720 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7722 (define_insn "*vec_concatv2di_rex64_sse"
7723 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7725 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7726 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7727 "TARGET_64BIT && TARGET_SSE"
7729 movq\t{%1, %0|%0, %1}
7730 movq\t{%1, %0|%0, %1}
7731 movq2dq\t{%1, %0|%0, %1}
7732 punpcklqdq\t{%2, %0|%0, %2}
7733 movlhps\t{%2, %0|%0, %2}
7734 movhps\t{%2, %0|%0, %2}"
7735 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7736 (set_attr "prefix_rex" "*,1,*,*,*,*")
7737 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7739 (define_expand "vec_unpacku_hi_v16qi"
7740 [(match_operand:V8HI 0 "register_operand" "")
7741 (match_operand:V16QI 1 "register_operand" "")]
7745 ix86_expand_sse4_unpack (operands, true, true);
7747 ix86_expand_sse_unpack (operands, true, true);
7751 (define_expand "vec_unpacks_hi_v16qi"
7752 [(match_operand:V8HI 0 "register_operand" "")
7753 (match_operand:V16QI 1 "register_operand" "")]
7757 ix86_expand_sse4_unpack (operands, false, true);
7759 ix86_expand_sse_unpack (operands, false, true);
7763 (define_expand "vec_unpacku_lo_v16qi"
7764 [(match_operand:V8HI 0 "register_operand" "")
7765 (match_operand:V16QI 1 "register_operand" "")]
7769 ix86_expand_sse4_unpack (operands, true, false);
7771 ix86_expand_sse_unpack (operands, true, false);
7775 (define_expand "vec_unpacks_lo_v16qi"
7776 [(match_operand:V8HI 0 "register_operand" "")
7777 (match_operand:V16QI 1 "register_operand" "")]
7781 ix86_expand_sse4_unpack (operands, false, false);
7783 ix86_expand_sse_unpack (operands, false, false);
7787 (define_expand "vec_unpacku_hi_v8hi"
7788 [(match_operand:V4SI 0 "register_operand" "")
7789 (match_operand:V8HI 1 "register_operand" "")]
7793 ix86_expand_sse4_unpack (operands, true, true);
7795 ix86_expand_sse_unpack (operands, true, true);
7799 (define_expand "vec_unpacks_hi_v8hi"
7800 [(match_operand:V4SI 0 "register_operand" "")
7801 (match_operand:V8HI 1 "register_operand" "")]
7805 ix86_expand_sse4_unpack (operands, false, true);
7807 ix86_expand_sse_unpack (operands, false, true);
7811 (define_expand "vec_unpacku_lo_v8hi"
7812 [(match_operand:V4SI 0 "register_operand" "")
7813 (match_operand:V8HI 1 "register_operand" "")]
7817 ix86_expand_sse4_unpack (operands, true, false);
7819 ix86_expand_sse_unpack (operands, true, false);
7823 (define_expand "vec_unpacks_lo_v8hi"
7824 [(match_operand:V4SI 0 "register_operand" "")
7825 (match_operand:V8HI 1 "register_operand" "")]
7829 ix86_expand_sse4_unpack (operands, false, false);
7831 ix86_expand_sse_unpack (operands, false, false);
7835 (define_expand "vec_unpacku_hi_v4si"
7836 [(match_operand:V2DI 0 "register_operand" "")
7837 (match_operand:V4SI 1 "register_operand" "")]
7841 ix86_expand_sse4_unpack (operands, true, true);
7843 ix86_expand_sse_unpack (operands, true, true);
7847 (define_expand "vec_unpacks_hi_v4si"
7848 [(match_operand:V2DI 0 "register_operand" "")
7849 (match_operand:V4SI 1 "register_operand" "")]
7853 ix86_expand_sse4_unpack (operands, false, true);
7855 ix86_expand_sse_unpack (operands, false, true);
7859 (define_expand "vec_unpacku_lo_v4si"
7860 [(match_operand:V2DI 0 "register_operand" "")
7861 (match_operand:V4SI 1 "register_operand" "")]
7865 ix86_expand_sse4_unpack (operands, true, false);
7867 ix86_expand_sse_unpack (operands, true, false);
7871 (define_expand "vec_unpacks_lo_v4si"
7872 [(match_operand:V2DI 0 "register_operand" "")
7873 (match_operand:V4SI 1 "register_operand" "")]
7877 ix86_expand_sse4_unpack (operands, false, false);
7879 ix86_expand_sse_unpack (operands, false, false);
7883 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7887 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7889 (define_expand "sse2_uavgv16qi3"
7890 [(set (match_operand:V16QI 0 "register_operand" "")
7896 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7898 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7899 (const_vector:V16QI [(const_int 1) (const_int 1)
7900 (const_int 1) (const_int 1)
7901 (const_int 1) (const_int 1)
7902 (const_int 1) (const_int 1)
7903 (const_int 1) (const_int 1)
7904 (const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)]))
7909 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7911 (define_insn "*avx_uavgv16qi3"
7912 [(set (match_operand:V16QI 0 "register_operand" "=x")
7918 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7920 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7921 (const_vector:V16QI [(const_int 1) (const_int 1)
7922 (const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)
7925 (const_int 1) (const_int 1)
7926 (const_int 1) (const_int 1)
7927 (const_int 1) (const_int 1)
7928 (const_int 1) (const_int 1)]))
7930 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7931 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7932 [(set_attr "type" "sseiadd")
7933 (set_attr "prefix" "vex")
7934 (set_attr "mode" "TI")])
7936 (define_insn "*sse2_uavgv16qi3"
7937 [(set (match_operand:V16QI 0 "register_operand" "=x")
7943 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7945 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7946 (const_vector:V16QI [(const_int 1) (const_int 1)
7947 (const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)
7949 (const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)
7951 (const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)]))
7955 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7956 "pavgb\t{%2, %0|%0, %2}"
7957 [(set_attr "type" "sseiadd")
7958 (set_attr "prefix_data16" "1")
7959 (set_attr "mode" "TI")])
7961 (define_expand "sse2_uavgv8hi3"
7962 [(set (match_operand:V8HI 0 "register_operand" "")
7968 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7970 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7971 (const_vector:V8HI [(const_int 1) (const_int 1)
7972 (const_int 1) (const_int 1)
7973 (const_int 1) (const_int 1)
7974 (const_int 1) (const_int 1)]))
7977 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7979 (define_insn "*avx_uavgv8hi3"
7980 [(set (match_operand:V8HI 0 "register_operand" "=x")
7986 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7988 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7989 (const_vector:V8HI [(const_int 1) (const_int 1)
7990 (const_int 1) (const_int 1)
7991 (const_int 1) (const_int 1)
7992 (const_int 1) (const_int 1)]))
7994 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7995 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7996 [(set_attr "type" "sseiadd")
7997 (set_attr "prefix" "vex")
7998 (set_attr "mode" "TI")])
8000 (define_insn "*sse2_uavgv8hi3"
8001 [(set (match_operand:V8HI 0 "register_operand" "=x")
8007 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8009 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8010 (const_vector:V8HI [(const_int 1) (const_int 1)
8011 (const_int 1) (const_int 1)
8012 (const_int 1) (const_int 1)
8013 (const_int 1) (const_int 1)]))
8015 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8016 "pavgw\t{%2, %0|%0, %2}"
8017 [(set_attr "type" "sseiadd")
8018 (set_attr "prefix_data16" "1")
8019 (set_attr "mode" "TI")])
8021 ;; The correct representation for this is absolutely enormous, and
8022 ;; surely not generally useful.
8023 (define_insn "*avx_psadbw"
8024 [(set (match_operand:V2DI 0 "register_operand" "=x")
8025 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8026 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8029 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8030 [(set_attr "type" "sseiadd")
8031 (set_attr "prefix" "vex")
8032 (set_attr "mode" "TI")])
8034 (define_insn "sse2_psadbw"
8035 [(set (match_operand:V2DI 0 "register_operand" "=x")
8036 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8037 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8040 "psadbw\t{%2, %0|%0, %2}"
8041 [(set_attr "type" "sseiadd")
8042 (set_attr "atom_unit" "simul")
8043 (set_attr "prefix_data16" "1")
8044 (set_attr "mode" "TI")])
8046 (define_insn "avx_movmsk<ssemodesuffix>256"
8047 [(set (match_operand:SI 0 "register_operand" "=r")
8049 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8051 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8052 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8053 [(set_attr "type" "ssecvt")
8054 (set_attr "prefix" "vex")
8055 (set_attr "mode" "<MODE>")])
8057 (define_insn "<sse>_movmsk<ssemodesuffix>"
8058 [(set (match_operand:SI 0 "register_operand" "=r")
8060 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8062 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8063 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8064 [(set_attr "type" "ssemov")
8065 (set_attr "prefix" "maybe_vex")
8066 (set_attr "mode" "<MODE>")])
8068 (define_insn "sse2_pmovmskb"
8069 [(set (match_operand:SI 0 "register_operand" "=r")
8070 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8073 "%vpmovmskb\t{%1, %0|%0, %1}"
8074 [(set_attr "type" "ssemov")
8075 (set_attr "prefix_data16" "1")
8076 (set_attr "prefix" "maybe_vex")
8077 (set_attr "mode" "SI")])
8079 (define_expand "sse2_maskmovdqu"
8080 [(set (match_operand:V16QI 0 "memory_operand" "")
8081 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8082 (match_operand:V16QI 2 "register_operand" "")
8088 (define_insn "*sse2_maskmovdqu"
8089 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8090 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8091 (match_operand:V16QI 2 "register_operand" "x")
8092 (mem:V16QI (match_dup 0))]
8094 "TARGET_SSE2 && !TARGET_64BIT"
8095 ;; @@@ check ordering of operands in intel/nonintel syntax
8096 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8097 [(set_attr "type" "ssemov")
8098 (set_attr "prefix_data16" "1")
8099 ;; The implicit %rdi operand confuses default length_vex computation.
8100 (set_attr "length_vex" "3")
8101 (set_attr "prefix" "maybe_vex")
8102 (set_attr "mode" "TI")])
8104 (define_insn "*sse2_maskmovdqu_rex64"
8105 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8106 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8107 (match_operand:V16QI 2 "register_operand" "x")
8108 (mem:V16QI (match_dup 0))]
8110 "TARGET_SSE2 && TARGET_64BIT"
8111 ;; @@@ check ordering of operands in intel/nonintel syntax
8112 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8113 [(set_attr "type" "ssemov")
8114 (set_attr "prefix_data16" "1")
8115 ;; The implicit %rdi operand confuses default length_vex computation.
8116 (set (attr "length_vex")
8117 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8118 (set_attr "prefix" "maybe_vex")
8119 (set_attr "mode" "TI")])
8121 (define_insn "sse_ldmxcsr"
8122 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8126 [(set_attr "type" "sse")
8127 (set_attr "atom_sse_attr" "mxcsr")
8128 (set_attr "prefix" "maybe_vex")
8129 (set_attr "memory" "load")])
8131 (define_insn "sse_stmxcsr"
8132 [(set (match_operand:SI 0 "memory_operand" "=m")
8133 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8136 [(set_attr "type" "sse")
8137 (set_attr "atom_sse_attr" "mxcsr")
8138 (set_attr "prefix" "maybe_vex")
8139 (set_attr "memory" "store")])
8141 (define_expand "sse_sfence"
8143 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8144 "TARGET_SSE || TARGET_3DNOW_A"
8146 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8147 MEM_VOLATILE_P (operands[0]) = 1;
8150 (define_insn "*sse_sfence"
8151 [(set (match_operand:BLK 0 "" "")
8152 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8153 "TARGET_SSE || TARGET_3DNOW_A"
8155 [(set_attr "type" "sse")
8156 (set_attr "length_address" "0")
8157 (set_attr "atom_sse_attr" "fence")
8158 (set_attr "memory" "unknown")])
8160 (define_insn "sse2_clflush"
8161 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8165 [(set_attr "type" "sse")
8166 (set_attr "atom_sse_attr" "fence")
8167 (set_attr "memory" "unknown")])
8169 (define_expand "sse2_mfence"
8171 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8174 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8175 MEM_VOLATILE_P (operands[0]) = 1;
8178 (define_insn "*sse2_mfence"
8179 [(set (match_operand:BLK 0 "" "")
8180 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8181 "TARGET_64BIT || TARGET_SSE2"
8183 [(set_attr "type" "sse")
8184 (set_attr "length_address" "0")
8185 (set_attr "atom_sse_attr" "fence")
8186 (set_attr "memory" "unknown")])
8188 (define_expand "sse2_lfence"
8190 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8193 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8194 MEM_VOLATILE_P (operands[0]) = 1;
8197 (define_insn "*sse2_lfence"
8198 [(set (match_operand:BLK 0 "" "")
8199 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8202 [(set_attr "type" "sse")
8203 (set_attr "length_address" "0")
8204 (set_attr "atom_sse_attr" "lfence")
8205 (set_attr "memory" "unknown")])
8207 (define_insn "sse3_mwait"
8208 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8209 (match_operand:SI 1 "register_operand" "c")]
8212 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8213 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8214 ;; we only need to set up 32bit registers.
8216 [(set_attr "length" "3")])
8218 (define_insn "sse3_monitor"
8219 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8220 (match_operand:SI 1 "register_operand" "c")
8221 (match_operand:SI 2 "register_operand" "d")]
8223 "TARGET_SSE3 && !TARGET_64BIT"
8224 "monitor\t%0, %1, %2"
8225 [(set_attr "length" "3")])
8227 (define_insn "sse3_monitor64"
8228 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8229 (match_operand:SI 1 "register_operand" "c")
8230 (match_operand:SI 2 "register_operand" "d")]
8232 "TARGET_SSE3 && TARGET_64BIT"
8233 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8234 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8235 ;; zero extended to 64bit, we only need to set up 32bit registers.
8237 [(set_attr "length" "3")])
8239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8241 ;; SSSE3 instructions
8243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8245 (define_insn "*avx_phaddwv8hi3"
8246 [(set (match_operand:V8HI 0 "register_operand" "=x")
8252 (match_operand:V8HI 1 "register_operand" "x")
8253 (parallel [(const_int 0)]))
8254 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8257 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8261 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8264 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8269 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8270 (parallel [(const_int 0)]))
8271 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8278 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8283 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8284 [(set_attr "type" "sseiadd")
8285 (set_attr "prefix_extra" "1")
8286 (set_attr "prefix" "vex")
8287 (set_attr "mode" "TI")])
8289 (define_insn "ssse3_phaddwv8hi3"
8290 [(set (match_operand:V8HI 0 "register_operand" "=x")
8296 (match_operand:V8HI 1 "register_operand" "0")
8297 (parallel [(const_int 0)]))
8298 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8300 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8313 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8314 (parallel [(const_int 0)]))
8315 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8317 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8327 "phaddw\t{%2, %0|%0, %2}"
8328 [(set_attr "type" "sseiadd")
8329 (set_attr "atom_unit" "complex")
8330 (set_attr "prefix_data16" "1")
8331 (set_attr "prefix_extra" "1")
8332 (set_attr "mode" "TI")])
8334 (define_insn "ssse3_phaddwv4hi3"
8335 [(set (match_operand:V4HI 0 "register_operand" "=y")
8340 (match_operand:V4HI 1 "register_operand" "0")
8341 (parallel [(const_int 0)]))
8342 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8344 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8345 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8349 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8350 (parallel [(const_int 0)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8353 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8356 "phaddw\t{%2, %0|%0, %2}"
8357 [(set_attr "type" "sseiadd")
8358 (set_attr "atom_unit" "complex")
8359 (set_attr "prefix_extra" "1")
8360 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8361 (set_attr "mode" "DI")])
8363 (define_insn "*avx_phadddv4si3"
8364 [(set (match_operand:V4SI 0 "register_operand" "=x")
8369 (match_operand:V4SI 1 "register_operand" "x")
8370 (parallel [(const_int 0)]))
8371 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8373 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8374 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8378 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8379 (parallel [(const_int 0)]))
8380 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8382 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8383 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8385 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8386 [(set_attr "type" "sseiadd")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "prefix" "vex")
8389 (set_attr "mode" "TI")])
8391 (define_insn "ssse3_phadddv4si3"
8392 [(set (match_operand:V4SI 0 "register_operand" "=x")
8397 (match_operand:V4SI 1 "register_operand" "0")
8398 (parallel [(const_int 0)]))
8399 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8401 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8402 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8406 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8407 (parallel [(const_int 0)]))
8408 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8410 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8411 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8413 "phaddd\t{%2, %0|%0, %2}"
8414 [(set_attr "type" "sseiadd")
8415 (set_attr "atom_unit" "complex")
8416 (set_attr "prefix_data16" "1")
8417 (set_attr "prefix_extra" "1")
8418 (set_attr "mode" "TI")])
8420 (define_insn "ssse3_phadddv2si3"
8421 [(set (match_operand:V2SI 0 "register_operand" "=y")
8425 (match_operand:V2SI 1 "register_operand" "0")
8426 (parallel [(const_int 0)]))
8427 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8430 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8431 (parallel [(const_int 0)]))
8432 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8434 "phaddd\t{%2, %0|%0, %2}"
8435 [(set_attr "type" "sseiadd")
8436 (set_attr "atom_unit" "complex")
8437 (set_attr "prefix_extra" "1")
8438 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8439 (set_attr "mode" "DI")])
8441 (define_insn "*avx_phaddswv8hi3"
8442 [(set (match_operand:V8HI 0 "register_operand" "=x")
8448 (match_operand:V8HI 1 "register_operand" "x")
8449 (parallel [(const_int 0)]))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8452 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8453 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8456 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8459 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8465 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8466 (parallel [(const_int 0)]))
8467 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8474 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8479 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8480 [(set_attr "type" "sseiadd")
8481 (set_attr "prefix_extra" "1")
8482 (set_attr "prefix" "vex")
8483 (set_attr "mode" "TI")])
8485 (define_insn "ssse3_phaddswv8hi3"
8486 [(set (match_operand:V8HI 0 "register_operand" "=x")
8492 (match_operand:V8HI 1 "register_operand" "0")
8493 (parallel [(const_int 0)]))
8494 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8496 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8497 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8500 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8501 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8503 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8504 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8509 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8510 (parallel [(const_int 0)]))
8511 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8513 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8517 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8518 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8520 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8521 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8523 "phaddsw\t{%2, %0|%0, %2}"
8524 [(set_attr "type" "sseiadd")
8525 (set_attr "atom_unit" "complex")
8526 (set_attr "prefix_data16" "1")
8527 (set_attr "prefix_extra" "1")
8528 (set_attr "mode" "TI")])
8530 (define_insn "ssse3_phaddswv4hi3"
8531 [(set (match_operand:V4HI 0 "register_operand" "=y")
8536 (match_operand:V4HI 1 "register_operand" "0")
8537 (parallel [(const_int 0)]))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8540 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8541 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8545 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8546 (parallel [(const_int 0)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8552 "phaddsw\t{%2, %0|%0, %2}"
8553 [(set_attr "type" "sseiadd")
8554 (set_attr "atom_unit" "complex")
8555 (set_attr "prefix_extra" "1")
8556 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8557 (set_attr "mode" "DI")])
8559 (define_insn "*avx_phsubwv8hi3"
8560 [(set (match_operand:V8HI 0 "register_operand" "=x")
8566 (match_operand:V8HI 1 "register_operand" "x")
8567 (parallel [(const_int 0)]))
8568 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8571 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8574 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8575 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8578 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8583 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8584 (parallel [(const_int 0)]))
8585 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8587 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8588 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8592 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8595 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8597 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8598 [(set_attr "type" "sseiadd")
8599 (set_attr "prefix_extra" "1")
8600 (set_attr "prefix" "vex")
8601 (set_attr "mode" "TI")])
8603 (define_insn "ssse3_phsubwv8hi3"
8604 [(set (match_operand:V8HI 0 "register_operand" "=x")
8610 (match_operand:V8HI 1 "register_operand" "0")
8611 (parallel [(const_int 0)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8618 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8619 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8621 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8627 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8628 (parallel [(const_int 0)]))
8629 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8631 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8636 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8641 "phsubw\t{%2, %0|%0, %2}"
8642 [(set_attr "type" "sseiadd")
8643 (set_attr "atom_unit" "complex")
8644 (set_attr "prefix_data16" "1")
8645 (set_attr "prefix_extra" "1")
8646 (set_attr "mode" "TI")])
8648 (define_insn "ssse3_phsubwv4hi3"
8649 [(set (match_operand:V4HI 0 "register_operand" "=y")
8654 (match_operand:V4HI 1 "register_operand" "0")
8655 (parallel [(const_int 0)]))
8656 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8658 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8664 (parallel [(const_int 0)]))
8665 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8667 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8670 "phsubw\t{%2, %0|%0, %2}"
8671 [(set_attr "type" "sseiadd")
8672 (set_attr "atom_unit" "complex")
8673 (set_attr "prefix_extra" "1")
8674 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8675 (set_attr "mode" "DI")])
8677 (define_insn "*avx_phsubdv4si3"
8678 [(set (match_operand:V4SI 0 "register_operand" "=x")
8683 (match_operand:V4SI 1 "register_operand" "x")
8684 (parallel [(const_int 0)]))
8685 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8687 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8688 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8692 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8693 (parallel [(const_int 0)]))
8694 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8696 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8697 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8699 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8700 [(set_attr "type" "sseiadd")
8701 (set_attr "prefix_extra" "1")
8702 (set_attr "prefix" "vex")
8703 (set_attr "mode" "TI")])
8705 (define_insn "ssse3_phsubdv4si3"
8706 [(set (match_operand:V4SI 0 "register_operand" "=x")
8711 (match_operand:V4SI 1 "register_operand" "0")
8712 (parallel [(const_int 0)]))
8713 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8715 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8716 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8720 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8721 (parallel [(const_int 0)]))
8722 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8724 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8725 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8727 "phsubd\t{%2, %0|%0, %2}"
8728 [(set_attr "type" "sseiadd")
8729 (set_attr "atom_unit" "complex")
8730 (set_attr "prefix_data16" "1")
8731 (set_attr "prefix_extra" "1")
8732 (set_attr "mode" "TI")])
8734 (define_insn "ssse3_phsubdv2si3"
8735 [(set (match_operand:V2SI 0 "register_operand" "=y")
8739 (match_operand:V2SI 1 "register_operand" "0")
8740 (parallel [(const_int 0)]))
8741 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8744 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8745 (parallel [(const_int 0)]))
8746 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8748 "phsubd\t{%2, %0|%0, %2}"
8749 [(set_attr "type" "sseiadd")
8750 (set_attr "atom_unit" "complex")
8751 (set_attr "prefix_extra" "1")
8752 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8753 (set_attr "mode" "DI")])
8755 (define_insn "*avx_phsubswv8hi3"
8756 [(set (match_operand:V8HI 0 "register_operand" "=x")
8762 (match_operand:V8HI 1 "register_operand" "x")
8763 (parallel [(const_int 0)]))
8764 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8766 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8774 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8779 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8780 (parallel [(const_int 0)]))
8781 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8783 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8787 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8788 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8790 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8791 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8793 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8794 [(set_attr "type" "sseiadd")
8795 (set_attr "prefix_extra" "1")
8796 (set_attr "prefix" "vex")
8797 (set_attr "mode" "TI")])
8799 (define_insn "ssse3_phsubswv8hi3"
8800 [(set (match_operand:V8HI 0 "register_operand" "=x")
8806 (match_operand:V8HI 1 "register_operand" "0")
8807 (parallel [(const_int 0)]))
8808 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8810 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8811 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8814 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8815 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8817 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8818 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8823 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8824 (parallel [(const_int 0)]))
8825 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8827 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8828 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8831 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8832 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8834 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8835 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8837 "phsubsw\t{%2, %0|%0, %2}"
8838 [(set_attr "type" "sseiadd")
8839 (set_attr "atom_unit" "complex")
8840 (set_attr "prefix_data16" "1")
8841 (set_attr "prefix_extra" "1")
8842 (set_attr "mode" "TI")])
8844 (define_insn "ssse3_phsubswv4hi3"
8845 [(set (match_operand:V4HI 0 "register_operand" "=y")
8850 (match_operand:V4HI 1 "register_operand" "0")
8851 (parallel [(const_int 0)]))
8852 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8859 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8860 (parallel [(const_int 0)]))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8863 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8866 "phsubsw\t{%2, %0|%0, %2}"
8867 [(set_attr "type" "sseiadd")
8868 (set_attr "atom_unit" "complex")
8869 (set_attr "prefix_extra" "1")
8870 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8871 (set_attr "mode" "DI")])
8873 (define_insn "*avx_pmaddubsw128"
8874 [(set (match_operand:V8HI 0 "register_operand" "=x")
8879 (match_operand:V16QI 1 "register_operand" "x")
8880 (parallel [(const_int 0)
8890 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8891 (parallel [(const_int 0)
8901 (vec_select:V16QI (match_dup 1)
8902 (parallel [(const_int 1)
8911 (vec_select:V16QI (match_dup 2)
8912 (parallel [(const_int 1)
8919 (const_int 15)]))))))]
8921 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8922 [(set_attr "type" "sseiadd")
8923 (set_attr "prefix_extra" "1")
8924 (set_attr "prefix" "vex")
8925 (set_attr "mode" "TI")])
8927 (define_insn "ssse3_pmaddubsw128"
8928 [(set (match_operand:V8HI 0 "register_operand" "=x")
8933 (match_operand:V16QI 1 "register_operand" "0")
8934 (parallel [(const_int 0)
8944 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8945 (parallel [(const_int 0)
8955 (vec_select:V16QI (match_dup 1)
8956 (parallel [(const_int 1)
8965 (vec_select:V16QI (match_dup 2)
8966 (parallel [(const_int 1)
8973 (const_int 15)]))))))]
8975 "pmaddubsw\t{%2, %0|%0, %2}"
8976 [(set_attr "type" "sseiadd")
8977 (set_attr "atom_unit" "simul")
8978 (set_attr "prefix_data16" "1")
8979 (set_attr "prefix_extra" "1")
8980 (set_attr "mode" "TI")])
8982 (define_insn "ssse3_pmaddubsw"
8983 [(set (match_operand:V4HI 0 "register_operand" "=y")
8988 (match_operand:V8QI 1 "register_operand" "0")
8989 (parallel [(const_int 0)
8995 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8996 (parallel [(const_int 0)
9002 (vec_select:V8QI (match_dup 1)
9003 (parallel [(const_int 1)
9008 (vec_select:V8QI (match_dup 2)
9009 (parallel [(const_int 1)
9012 (const_int 7)]))))))]
9014 "pmaddubsw\t{%2, %0|%0, %2}"
9015 [(set_attr "type" "sseiadd")
9016 (set_attr "atom_unit" "simul")
9017 (set_attr "prefix_extra" "1")
9018 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9019 (set_attr "mode" "DI")])
9021 (define_expand "ssse3_pmulhrswv8hi3"
9022 [(set (match_operand:V8HI 0 "register_operand" "")
9029 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9031 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9033 (const_vector:V8HI [(const_int 1) (const_int 1)
9034 (const_int 1) (const_int 1)
9035 (const_int 1) (const_int 1)
9036 (const_int 1) (const_int 1)]))
9039 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9041 (define_insn "*avx_pmulhrswv8hi3"
9042 [(set (match_operand:V8HI 0 "register_operand" "=x")
9049 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9051 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9053 (const_vector:V8HI [(const_int 1) (const_int 1)
9054 (const_int 1) (const_int 1)
9055 (const_int 1) (const_int 1)
9056 (const_int 1) (const_int 1)]))
9058 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9059 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9060 [(set_attr "type" "sseimul")
9061 (set_attr "prefix_extra" "1")
9062 (set_attr "prefix" "vex")
9063 (set_attr "mode" "TI")])
9065 (define_insn "*ssse3_pmulhrswv8hi3"
9066 [(set (match_operand:V8HI 0 "register_operand" "=x")
9073 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9075 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9077 (const_vector:V8HI [(const_int 1) (const_int 1)
9078 (const_int 1) (const_int 1)
9079 (const_int 1) (const_int 1)
9080 (const_int 1) (const_int 1)]))
9082 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9083 "pmulhrsw\t{%2, %0|%0, %2}"
9084 [(set_attr "type" "sseimul")
9085 (set_attr "prefix_data16" "1")
9086 (set_attr "prefix_extra" "1")
9087 (set_attr "mode" "TI")])
9089 (define_expand "ssse3_pmulhrswv4hi3"
9090 [(set (match_operand:V4HI 0 "register_operand" "")
9097 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9099 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9101 (const_vector:V4HI [(const_int 1) (const_int 1)
9102 (const_int 1) (const_int 1)]))
9105 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9107 (define_insn "*ssse3_pmulhrswv4hi3"
9108 [(set (match_operand:V4HI 0 "register_operand" "=y")
9115 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9117 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9119 (const_vector:V4HI [(const_int 1) (const_int 1)
9120 (const_int 1) (const_int 1)]))
9122 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9123 "pmulhrsw\t{%2, %0|%0, %2}"
9124 [(set_attr "type" "sseimul")
9125 (set_attr "prefix_extra" "1")
9126 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9127 (set_attr "mode" "DI")])
9129 (define_insn "*avx_pshufbv16qi3"
9130 [(set (match_operand:V16QI 0 "register_operand" "=x")
9131 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9132 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9135 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9136 [(set_attr "type" "sselog1")
9137 (set_attr "prefix_extra" "1")
9138 (set_attr "prefix" "vex")
9139 (set_attr "mode" "TI")])
9141 (define_insn "ssse3_pshufbv16qi3"
9142 [(set (match_operand:V16QI 0 "register_operand" "=x")
9143 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9144 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9147 "pshufb\t{%2, %0|%0, %2}";
9148 [(set_attr "type" "sselog1")
9149 (set_attr "prefix_data16" "1")
9150 (set_attr "prefix_extra" "1")
9151 (set_attr "mode" "TI")])
9153 (define_insn "ssse3_pshufbv8qi3"
9154 [(set (match_operand:V8QI 0 "register_operand" "=y")
9155 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9156 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9159 "pshufb\t{%2, %0|%0, %2}";
9160 [(set_attr "type" "sselog1")
9161 (set_attr "prefix_extra" "1")
9162 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9163 (set_attr "mode" "DI")])
9165 (define_insn "*avx_psign<mode>3"
9166 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9168 [(match_operand:SSEMODE124 1 "register_operand" "x")
9169 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9172 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9173 [(set_attr "type" "sselog1")
9174 (set_attr "prefix_extra" "1")
9175 (set_attr "prefix" "vex")
9176 (set_attr "mode" "TI")])
9178 (define_insn "ssse3_psign<mode>3"
9179 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9181 [(match_operand:SSEMODE124 1 "register_operand" "0")
9182 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9185 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9186 [(set_attr "type" "sselog1")
9187 (set_attr "prefix_data16" "1")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "mode" "TI")])
9191 (define_insn "ssse3_psign<mode>3"
9192 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9194 [(match_operand:MMXMODEI 1 "register_operand" "0")
9195 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9198 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9199 [(set_attr "type" "sselog1")
9200 (set_attr "prefix_extra" "1")
9201 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9202 (set_attr "mode" "DI")])
9204 (define_insn "*avx_palignrti"
9205 [(set (match_operand:TI 0 "register_operand" "=x")
9206 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9207 (match_operand:TI 2 "nonimmediate_operand" "xm")
9208 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9212 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9213 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9215 [(set_attr "type" "sseishft")
9216 (set_attr "prefix_extra" "1")
9217 (set_attr "length_immediate" "1")
9218 (set_attr "prefix" "vex")
9219 (set_attr "mode" "TI")])
9221 (define_insn "ssse3_palignrti"
9222 [(set (match_operand:TI 0 "register_operand" "=x")
9223 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9224 (match_operand:TI 2 "nonimmediate_operand" "xm")
9225 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9229 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9230 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9232 [(set_attr "type" "sseishft")
9233 (set_attr "atom_unit" "sishuf")
9234 (set_attr "prefix_data16" "1")
9235 (set_attr "prefix_extra" "1")
9236 (set_attr "length_immediate" "1")
9237 (set_attr "mode" "TI")])
9239 (define_insn "ssse3_palignrdi"
9240 [(set (match_operand:DI 0 "register_operand" "=y")
9241 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9242 (match_operand:DI 2 "nonimmediate_operand" "ym")
9243 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9247 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9248 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9250 [(set_attr "type" "sseishft")
9251 (set_attr "atom_unit" "sishuf")
9252 (set_attr "prefix_extra" "1")
9253 (set_attr "length_immediate" "1")
9254 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9255 (set_attr "mode" "DI")])
9257 (define_insn "abs<mode>2"
9258 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9259 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9261 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9262 [(set_attr "type" "sselog1")
9263 (set_attr "prefix_data16" "1")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "prefix" "maybe_vex")
9266 (set_attr "mode" "TI")])
9268 (define_insn "abs<mode>2"
9269 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9270 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9272 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9273 [(set_attr "type" "sselog1")
9274 (set_attr "prefix_rep" "0")
9275 (set_attr "prefix_extra" "1")
9276 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9277 (set_attr "mode" "DI")])
9279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9281 ;; AMD SSE4A instructions
9283 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9285 (define_insn "sse4a_movnt<mode>"
9286 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9288 [(match_operand:MODEF 1 "register_operand" "x")]
9291 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9292 [(set_attr "type" "ssemov")
9293 (set_attr "mode" "<MODE>")])
9295 (define_insn "sse4a_vmmovnt<mode>"
9296 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9297 (unspec:<ssescalarmode>
9298 [(vec_select:<ssescalarmode>
9299 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9300 (parallel [(const_int 0)]))]
9303 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9304 [(set_attr "type" "ssemov")
9305 (set_attr "mode" "<ssescalarmode>")])
9307 (define_insn "sse4a_extrqi"
9308 [(set (match_operand:V2DI 0 "register_operand" "=x")
9309 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9310 (match_operand 2 "const_int_operand" "")
9311 (match_operand 3 "const_int_operand" "")]
9314 "extrq\t{%3, %2, %0|%0, %2, %3}"
9315 [(set_attr "type" "sse")
9316 (set_attr "prefix_data16" "1")
9317 (set_attr "length_immediate" "2")
9318 (set_attr "mode" "TI")])
9320 (define_insn "sse4a_extrq"
9321 [(set (match_operand:V2DI 0 "register_operand" "=x")
9322 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9323 (match_operand:V16QI 2 "register_operand" "x")]
9326 "extrq\t{%2, %0|%0, %2}"
9327 [(set_attr "type" "sse")
9328 (set_attr "prefix_data16" "1")
9329 (set_attr "mode" "TI")])
9331 (define_insn "sse4a_insertqi"
9332 [(set (match_operand:V2DI 0 "register_operand" "=x")
9333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9334 (match_operand:V2DI 2 "register_operand" "x")
9335 (match_operand 3 "const_int_operand" "")
9336 (match_operand 4 "const_int_operand" "")]
9339 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9340 [(set_attr "type" "sseins")
9341 (set_attr "prefix_data16" "0")
9342 (set_attr "prefix_rep" "1")
9343 (set_attr "length_immediate" "2")
9344 (set_attr "mode" "TI")])
9346 (define_insn "sse4a_insertq"
9347 [(set (match_operand:V2DI 0 "register_operand" "=x")
9348 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9349 (match_operand:V2DI 2 "register_operand" "x")]
9352 "insertq\t{%2, %0|%0, %2}"
9353 [(set_attr "type" "sseins")
9354 (set_attr "prefix_data16" "0")
9355 (set_attr "prefix_rep" "1")
9356 (set_attr "mode" "TI")])
9358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9360 ;; Intel SSE4.1 instructions
9362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9364 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9365 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9366 (vec_merge:AVXMODEF2P
9367 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9368 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9369 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9371 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9372 [(set_attr "type" "ssemov")
9373 (set_attr "prefix_extra" "1")
9374 (set_attr "length_immediate" "1")
9375 (set_attr "prefix" "vex")
9376 (set_attr "mode" "<avxvecmode>")])
9378 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9379 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9381 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9382 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9383 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9386 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9387 [(set_attr "type" "ssemov")
9388 (set_attr "prefix_extra" "1")
9389 (set_attr "length_immediate" "1")
9390 (set_attr "prefix" "vex")
9391 (set_attr "mode" "<avxvecmode>")])
9393 (define_insn "sse4_1_blend<ssemodesuffix>"
9394 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9395 (vec_merge:SSEMODEF2P
9396 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9397 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9398 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9400 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9401 [(set_attr "type" "ssemov")
9402 (set_attr "prefix_data16" "1")
9403 (set_attr "prefix_extra" "1")
9404 (set_attr "length_immediate" "1")
9405 (set_attr "mode" "<MODE>")])
9407 (define_insn "sse4_1_blendv<ssemodesuffix>"
9408 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9410 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9411 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9412 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9415 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9416 [(set_attr "type" "ssemov")
9417 (set_attr "prefix_data16" "1")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "mode" "<MODE>")])
9421 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9422 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9424 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9425 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9426 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9429 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9430 [(set_attr "type" "ssemul")
9431 (set_attr "prefix" "vex")
9432 (set_attr "prefix_extra" "1")
9433 (set_attr "length_immediate" "1")
9434 (set_attr "mode" "<avxvecmode>")])
9436 (define_insn "sse4_1_dp<ssemodesuffix>"
9437 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9439 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9440 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9441 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9444 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9445 [(set_attr "type" "ssemul")
9446 (set_attr "prefix_data16" "1")
9447 (set_attr "prefix_extra" "1")
9448 (set_attr "length_immediate" "1")
9449 (set_attr "mode" "<MODE>")])
9451 (define_insn "sse4_1_movntdqa"
9452 [(set (match_operand:V2DI 0 "register_operand" "=x")
9453 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9456 "%vmovntdqa\t{%1, %0|%0, %1}"
9457 [(set_attr "type" "ssemov")
9458 (set_attr "prefix_extra" "1")
9459 (set_attr "prefix" "maybe_vex")
9460 (set_attr "mode" "TI")])
9462 (define_insn "*avx_mpsadbw"
9463 [(set (match_operand:V16QI 0 "register_operand" "=x")
9464 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9465 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9466 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9469 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9470 [(set_attr "type" "sselog1")
9471 (set_attr "prefix" "vex")
9472 (set_attr "prefix_extra" "1")
9473 (set_attr "length_immediate" "1")
9474 (set_attr "mode" "TI")])
9476 (define_insn "sse4_1_mpsadbw"
9477 [(set (match_operand:V16QI 0 "register_operand" "=x")
9478 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9479 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9480 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9483 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9484 [(set_attr "type" "sselog1")
9485 (set_attr "prefix_extra" "1")
9486 (set_attr "length_immediate" "1")
9487 (set_attr "mode" "TI")])
9489 (define_insn "*avx_packusdw"
9490 [(set (match_operand:V8HI 0 "register_operand" "=x")
9493 (match_operand:V4SI 1 "register_operand" "x"))
9495 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9497 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9498 [(set_attr "type" "sselog")
9499 (set_attr "prefix_extra" "1")
9500 (set_attr "prefix" "vex")
9501 (set_attr "mode" "TI")])
9503 (define_insn "sse4_1_packusdw"
9504 [(set (match_operand:V8HI 0 "register_operand" "=x")
9507 (match_operand:V4SI 1 "register_operand" "0"))
9509 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9511 "packusdw\t{%2, %0|%0, %2}"
9512 [(set_attr "type" "sselog")
9513 (set_attr "prefix_extra" "1")
9514 (set_attr "mode" "TI")])
9516 (define_insn "*avx_pblendvb"
9517 [(set (match_operand:V16QI 0 "register_operand" "=x")
9518 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9519 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9520 (match_operand:V16QI 3 "register_operand" "x")]
9523 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9524 [(set_attr "type" "ssemov")
9525 (set_attr "prefix_extra" "1")
9526 (set_attr "length_immediate" "1")
9527 (set_attr "prefix" "vex")
9528 (set_attr "mode" "TI")])
9530 (define_insn "sse4_1_pblendvb"
9531 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9532 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9533 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9534 (match_operand:V16QI 3 "register_operand" "Yz")]
9537 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9538 [(set_attr "type" "ssemov")
9539 (set_attr "prefix_extra" "1")
9540 (set_attr "mode" "TI")])
9542 (define_insn "*avx_pblendw"
9543 [(set (match_operand:V8HI 0 "register_operand" "=x")
9545 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9546 (match_operand:V8HI 1 "register_operand" "x")
9547 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9549 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9550 [(set_attr "type" "ssemov")
9551 (set_attr "prefix" "vex")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "length_immediate" "1")
9554 (set_attr "mode" "TI")])
9556 (define_insn "sse4_1_pblendw"
9557 [(set (match_operand:V8HI 0 "register_operand" "=x")
9559 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9560 (match_operand:V8HI 1 "register_operand" "0")
9561 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9563 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9564 [(set_attr "type" "ssemov")
9565 (set_attr "prefix_extra" "1")
9566 (set_attr "length_immediate" "1")
9567 (set_attr "mode" "TI")])
9569 (define_insn "sse4_1_phminposuw"
9570 [(set (match_operand:V8HI 0 "register_operand" "=x")
9571 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9572 UNSPEC_PHMINPOSUW))]
9574 "%vphminposuw\t{%1, %0|%0, %1}"
9575 [(set_attr "type" "sselog1")
9576 (set_attr "prefix_extra" "1")
9577 (set_attr "prefix" "maybe_vex")
9578 (set_attr "mode" "TI")])
9580 (define_insn "sse4_1_extendv8qiv8hi2"
9581 [(set (match_operand:V8HI 0 "register_operand" "=x")
9584 (match_operand:V16QI 1 "register_operand" "x")
9585 (parallel [(const_int 0)
9594 "%vpmovsxbw\t{%1, %0|%0, %1}"
9595 [(set_attr "type" "ssemov")
9596 (set_attr "prefix_extra" "1")
9597 (set_attr "prefix" "maybe_vex")
9598 (set_attr "mode" "TI")])
9600 (define_insn "*sse4_1_extendv8qiv8hi2"
9601 [(set (match_operand:V8HI 0 "register_operand" "=x")
9604 (vec_duplicate:V16QI
9605 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9606 (parallel [(const_int 0)
9615 "%vpmovsxbw\t{%1, %0|%0, %1}"
9616 [(set_attr "type" "ssemov")
9617 (set_attr "prefix_extra" "1")
9618 (set_attr "prefix" "maybe_vex")
9619 (set_attr "mode" "TI")])
9621 (define_insn "sse4_1_extendv4qiv4si2"
9622 [(set (match_operand:V4SI 0 "register_operand" "=x")
9625 (match_operand:V16QI 1 "register_operand" "x")
9626 (parallel [(const_int 0)
9631 "%vpmovsxbd\t{%1, %0|%0, %1}"
9632 [(set_attr "type" "ssemov")
9633 (set_attr "prefix_extra" "1")
9634 (set_attr "prefix" "maybe_vex")
9635 (set_attr "mode" "TI")])
9637 (define_insn "*sse4_1_extendv4qiv4si2"
9638 [(set (match_operand:V4SI 0 "register_operand" "=x")
9641 (vec_duplicate:V16QI
9642 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9643 (parallel [(const_int 0)
9648 "%vpmovsxbd\t{%1, %0|%0, %1}"
9649 [(set_attr "type" "ssemov")
9650 (set_attr "prefix_extra" "1")
9651 (set_attr "prefix" "maybe_vex")
9652 (set_attr "mode" "TI")])
9654 (define_insn "sse4_1_extendv2qiv2di2"
9655 [(set (match_operand:V2DI 0 "register_operand" "=x")
9658 (match_operand:V16QI 1 "register_operand" "x")
9659 (parallel [(const_int 0)
9662 "%vpmovsxbq\t{%1, %0|%0, %1}"
9663 [(set_attr "type" "ssemov")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "prefix" "maybe_vex")
9666 (set_attr "mode" "TI")])
9668 (define_insn "*sse4_1_extendv2qiv2di2"
9669 [(set (match_operand:V2DI 0 "register_operand" "=x")
9672 (vec_duplicate:V16QI
9673 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9674 (parallel [(const_int 0)
9677 "%vpmovsxbq\t{%1, %0|%0, %1}"
9678 [(set_attr "type" "ssemov")
9679 (set_attr "prefix_extra" "1")
9680 (set_attr "prefix" "maybe_vex")
9681 (set_attr "mode" "TI")])
9683 (define_insn "sse4_1_extendv4hiv4si2"
9684 [(set (match_operand:V4SI 0 "register_operand" "=x")
9687 (match_operand:V8HI 1 "register_operand" "x")
9688 (parallel [(const_int 0)
9693 "%vpmovsxwd\t{%1, %0|%0, %1}"
9694 [(set_attr "type" "ssemov")
9695 (set_attr "prefix_extra" "1")
9696 (set_attr "prefix" "maybe_vex")
9697 (set_attr "mode" "TI")])
9699 (define_insn "*sse4_1_extendv4hiv4si2"
9700 [(set (match_operand:V4SI 0 "register_operand" "=x")
9704 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9705 (parallel [(const_int 0)
9710 "%vpmovsxwd\t{%1, %0|%0, %1}"
9711 [(set_attr "type" "ssemov")
9712 (set_attr "prefix_extra" "1")
9713 (set_attr "prefix" "maybe_vex")
9714 (set_attr "mode" "TI")])
9716 (define_insn "sse4_1_extendv2hiv2di2"
9717 [(set (match_operand:V2DI 0 "register_operand" "=x")
9720 (match_operand:V8HI 1 "register_operand" "x")
9721 (parallel [(const_int 0)
9724 "%vpmovsxwq\t{%1, %0|%0, %1}"
9725 [(set_attr "type" "ssemov")
9726 (set_attr "prefix_extra" "1")
9727 (set_attr "prefix" "maybe_vex")
9728 (set_attr "mode" "TI")])
9730 (define_insn "*sse4_1_extendv2hiv2di2"
9731 [(set (match_operand:V2DI 0 "register_operand" "=x")
9735 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9736 (parallel [(const_int 0)
9739 "%vpmovsxwq\t{%1, %0|%0, %1}"
9740 [(set_attr "type" "ssemov")
9741 (set_attr "prefix_extra" "1")
9742 (set_attr "prefix" "maybe_vex")
9743 (set_attr "mode" "TI")])
9745 (define_insn "sse4_1_extendv2siv2di2"
9746 [(set (match_operand:V2DI 0 "register_operand" "=x")
9749 (match_operand:V4SI 1 "register_operand" "x")
9750 (parallel [(const_int 0)
9753 "%vpmovsxdq\t{%1, %0|%0, %1}"
9754 [(set_attr "type" "ssemov")
9755 (set_attr "prefix_extra" "1")
9756 (set_attr "prefix" "maybe_vex")
9757 (set_attr "mode" "TI")])
9759 (define_insn "*sse4_1_extendv2siv2di2"
9760 [(set (match_operand:V2DI 0 "register_operand" "=x")
9764 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9765 (parallel [(const_int 0)
9768 "%vpmovsxdq\t{%1, %0|%0, %1}"
9769 [(set_attr "type" "ssemov")
9770 (set_attr "prefix_extra" "1")
9771 (set_attr "prefix" "maybe_vex")
9772 (set_attr "mode" "TI")])
9774 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9775 [(set (match_operand:V8HI 0 "register_operand" "=x")
9778 (match_operand:V16QI 1 "register_operand" "x")
9779 (parallel [(const_int 0)
9788 "%vpmovzxbw\t{%1, %0|%0, %1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix_extra" "1")
9791 (set_attr "prefix" "maybe_vex")
9792 (set_attr "mode" "TI")])
9794 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9795 [(set (match_operand:V8HI 0 "register_operand" "=x")
9798 (vec_duplicate:V16QI
9799 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9800 (parallel [(const_int 0)
9809 "%vpmovzxbw\t{%1, %0|%0, %1}"
9810 [(set_attr "type" "ssemov")
9811 (set_attr "prefix_extra" "1")
9812 (set_attr "prefix" "maybe_vex")
9813 (set_attr "mode" "TI")])
9815 (define_insn "sse4_1_zero_extendv4qiv4si2"
9816 [(set (match_operand:V4SI 0 "register_operand" "=x")
9819 (match_operand:V16QI 1 "register_operand" "x")
9820 (parallel [(const_int 0)
9825 "%vpmovzxbd\t{%1, %0|%0, %1}"
9826 [(set_attr "type" "ssemov")
9827 (set_attr "prefix_extra" "1")
9828 (set_attr "prefix" "maybe_vex")
9829 (set_attr "mode" "TI")])
9831 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9832 [(set (match_operand:V4SI 0 "register_operand" "=x")
9835 (vec_duplicate:V16QI
9836 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9837 (parallel [(const_int 0)
9842 "%vpmovzxbd\t{%1, %0|%0, %1}"
9843 [(set_attr "type" "ssemov")
9844 (set_attr "prefix_extra" "1")
9845 (set_attr "prefix" "maybe_vex")
9846 (set_attr "mode" "TI")])
9848 (define_insn "sse4_1_zero_extendv2qiv2di2"
9849 [(set (match_operand:V2DI 0 "register_operand" "=x")
9852 (match_operand:V16QI 1 "register_operand" "x")
9853 (parallel [(const_int 0)
9856 "%vpmovzxbq\t{%1, %0|%0, %1}"
9857 [(set_attr "type" "ssemov")
9858 (set_attr "prefix_extra" "1")
9859 (set_attr "prefix" "maybe_vex")
9860 (set_attr "mode" "TI")])
9862 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9863 [(set (match_operand:V2DI 0 "register_operand" "=x")
9866 (vec_duplicate:V16QI
9867 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9868 (parallel [(const_int 0)
9871 "%vpmovzxbq\t{%1, %0|%0, %1}"
9872 [(set_attr "type" "ssemov")
9873 (set_attr "prefix_extra" "1")
9874 (set_attr "prefix" "maybe_vex")
9875 (set_attr "mode" "TI")])
9877 (define_insn "sse4_1_zero_extendv4hiv4si2"
9878 [(set (match_operand:V4SI 0 "register_operand" "=x")
9881 (match_operand:V8HI 1 "register_operand" "x")
9882 (parallel [(const_int 0)
9887 "%vpmovzxwd\t{%1, %0|%0, %1}"
9888 [(set_attr "type" "ssemov")
9889 (set_attr "prefix_extra" "1")
9890 (set_attr "prefix" "maybe_vex")
9891 (set_attr "mode" "TI")])
9893 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9894 [(set (match_operand:V4SI 0 "register_operand" "=x")
9898 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9899 (parallel [(const_int 0)
9904 "%vpmovzxwd\t{%1, %0|%0, %1}"
9905 [(set_attr "type" "ssemov")
9906 (set_attr "prefix_extra" "1")
9907 (set_attr "prefix" "maybe_vex")
9908 (set_attr "mode" "TI")])
9910 (define_insn "sse4_1_zero_extendv2hiv2di2"
9911 [(set (match_operand:V2DI 0 "register_operand" "=x")
9914 (match_operand:V8HI 1 "register_operand" "x")
9915 (parallel [(const_int 0)
9918 "%vpmovzxwq\t{%1, %0|%0, %1}"
9919 [(set_attr "type" "ssemov")
9920 (set_attr "prefix_extra" "1")
9921 (set_attr "prefix" "maybe_vex")
9922 (set_attr "mode" "TI")])
9924 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9925 [(set (match_operand:V2DI 0 "register_operand" "=x")
9929 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9930 (parallel [(const_int 0)
9933 "%vpmovzxwq\t{%1, %0|%0, %1}"
9934 [(set_attr "type" "ssemov")
9935 (set_attr "prefix_extra" "1")
9936 (set_attr "prefix" "maybe_vex")
9937 (set_attr "mode" "TI")])
9939 (define_insn "sse4_1_zero_extendv2siv2di2"
9940 [(set (match_operand:V2DI 0 "register_operand" "=x")
9943 (match_operand:V4SI 1 "register_operand" "x")
9944 (parallel [(const_int 0)
9947 "%vpmovzxdq\t{%1, %0|%0, %1}"
9948 [(set_attr "type" "ssemov")
9949 (set_attr "prefix_extra" "1")
9950 (set_attr "prefix" "maybe_vex")
9951 (set_attr "mode" "TI")])
9953 (define_insn "*sse4_1_zero_extendv2siv2di2"
9954 [(set (match_operand:V2DI 0 "register_operand" "=x")
9958 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9959 (parallel [(const_int 0)
9962 "%vpmovzxdq\t{%1, %0|%0, %1}"
9963 [(set_attr "type" "ssemov")
9964 (set_attr "prefix_extra" "1")
9965 (set_attr "prefix" "maybe_vex")
9966 (set_attr "mode" "TI")])
9968 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9969 ;; setting FLAGS_REG. But it is not a really compare instruction.
9970 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9971 [(set (reg:CC FLAGS_REG)
9972 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9973 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9976 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9977 [(set_attr "type" "ssecomi")
9978 (set_attr "prefix_extra" "1")
9979 (set_attr "prefix" "vex")
9980 (set_attr "mode" "<MODE>")])
9982 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9983 ;; But it is not a really compare instruction.
9984 (define_insn "avx_ptest256"
9985 [(set (reg:CC FLAGS_REG)
9986 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9987 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9990 "vptest\t{%1, %0|%0, %1}"
9991 [(set_attr "type" "ssecomi")
9992 (set_attr "prefix_extra" "1")
9993 (set_attr "prefix" "vex")
9994 (set_attr "mode" "OI")])
9996 (define_insn "sse4_1_ptest"
9997 [(set (reg:CC FLAGS_REG)
9998 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9999 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10002 "%vptest\t{%1, %0|%0, %1}"
10003 [(set_attr "type" "ssecomi")
10004 (set_attr "prefix_extra" "1")
10005 (set_attr "prefix" "maybe_vex")
10006 (set_attr "mode" "TI")])
10008 (define_insn "avx_round<ssemodesuffix>256"
10009 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
10010 (unspec:AVX256MODEF2P
10011 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10012 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10015 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10016 [(set_attr "type" "ssecvt")
10017 (set_attr "prefix_extra" "1")
10018 (set_attr "length_immediate" "1")
10019 (set_attr "prefix" "vex")
10020 (set_attr "mode" "<MODE>")])
10022 (define_insn "sse4_1_round<ssemodesuffix>"
10023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10025 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10026 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10029 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10030 [(set_attr "type" "ssecvt")
10031 (set_attr "prefix_data16" "1")
10032 (set_attr "prefix_extra" "1")
10033 (set_attr "length_immediate" "1")
10034 (set_attr "prefix" "maybe_vex")
10035 (set_attr "mode" "<MODE>")])
10037 (define_insn "*avx_round<ssescalarmodesuffix>"
10038 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10039 (vec_merge:SSEMODEF2P
10041 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10042 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10044 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10047 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10048 [(set_attr "type" "ssecvt")
10049 (set_attr "prefix_extra" "1")
10050 (set_attr "length_immediate" "1")
10051 (set_attr "prefix" "vex")
10052 (set_attr "mode" "<MODE>")])
10054 (define_insn "sse4_1_round<ssescalarmodesuffix>"
10055 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10056 (vec_merge:SSEMODEF2P
10058 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10059 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10061 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10064 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10065 [(set_attr "type" "ssecvt")
10066 (set_attr "prefix_data16" "1")
10067 (set_attr "prefix_extra" "1")
10068 (set_attr "length_immediate" "1")
10069 (set_attr "mode" "<MODE>")])
10071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10073 ;; Intel SSE4.2 string/text processing instructions
10075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10077 (define_insn_and_split "sse4_2_pcmpestr"
10078 [(set (match_operand:SI 0 "register_operand" "=c,c")
10080 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10081 (match_operand:SI 3 "register_operand" "a,a")
10082 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10083 (match_operand:SI 5 "register_operand" "d,d")
10084 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10086 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10094 (set (reg:CC FLAGS_REG)
10103 && can_create_pseudo_p ()"
10108 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10109 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10110 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10113 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10114 operands[3], operands[4],
10115 operands[5], operands[6]));
10117 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10118 operands[3], operands[4],
10119 operands[5], operands[6]));
10120 if (flags && !(ecx || xmm0))
10121 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10122 operands[2], operands[3],
10123 operands[4], operands[5],
10127 [(set_attr "type" "sselog")
10128 (set_attr "prefix_data16" "1")
10129 (set_attr "prefix_extra" "1")
10130 (set_attr "length_immediate" "1")
10131 (set_attr "memory" "none,load")
10132 (set_attr "mode" "TI")])
10134 (define_insn "sse4_2_pcmpestri"
10135 [(set (match_operand:SI 0 "register_operand" "=c,c")
10137 [(match_operand:V16QI 1 "register_operand" "x,x")
10138 (match_operand:SI 2 "register_operand" "a,a")
10139 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10140 (match_operand:SI 4 "register_operand" "d,d")
10141 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10143 (set (reg:CC FLAGS_REG)
10152 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10153 [(set_attr "type" "sselog")
10154 (set_attr "prefix_data16" "1")
10155 (set_attr "prefix_extra" "1")
10156 (set_attr "prefix" "maybe_vex")
10157 (set_attr "length_immediate" "1")
10158 (set_attr "memory" "none,load")
10159 (set_attr "mode" "TI")])
10161 (define_insn "sse4_2_pcmpestrm"
10162 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10164 [(match_operand:V16QI 1 "register_operand" "x,x")
10165 (match_operand:SI 2 "register_operand" "a,a")
10166 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10167 (match_operand:SI 4 "register_operand" "d,d")
10168 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10170 (set (reg:CC FLAGS_REG)
10179 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10180 [(set_attr "type" "sselog")
10181 (set_attr "prefix_data16" "1")
10182 (set_attr "prefix_extra" "1")
10183 (set_attr "length_immediate" "1")
10184 (set_attr "prefix" "maybe_vex")
10185 (set_attr "memory" "none,load")
10186 (set_attr "mode" "TI")])
10188 (define_insn "sse4_2_pcmpestr_cconly"
10189 [(set (reg:CC FLAGS_REG)
10191 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10192 (match_operand:SI 3 "register_operand" "a,a,a,a")
10193 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10194 (match_operand:SI 5 "register_operand" "d,d,d,d")
10195 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10197 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10198 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10201 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10202 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10203 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10204 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10205 [(set_attr "type" "sselog")
10206 (set_attr "prefix_data16" "1")
10207 (set_attr "prefix_extra" "1")
10208 (set_attr "length_immediate" "1")
10209 (set_attr "memory" "none,load,none,load")
10210 (set_attr "prefix" "maybe_vex")
10211 (set_attr "mode" "TI")])
10213 (define_insn_and_split "sse4_2_pcmpistr"
10214 [(set (match_operand:SI 0 "register_operand" "=c,c")
10216 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10217 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10218 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10220 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10226 (set (reg:CC FLAGS_REG)
10233 && can_create_pseudo_p ()"
10238 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10239 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10240 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10243 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10244 operands[3], operands[4]));
10246 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10247 operands[3], operands[4]));
10248 if (flags && !(ecx || xmm0))
10249 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10250 operands[2], operands[3],
10254 [(set_attr "type" "sselog")
10255 (set_attr "prefix_data16" "1")
10256 (set_attr "prefix_extra" "1")
10257 (set_attr "length_immediate" "1")
10258 (set_attr "memory" "none,load")
10259 (set_attr "mode" "TI")])
10261 (define_insn "sse4_2_pcmpistri"
10262 [(set (match_operand:SI 0 "register_operand" "=c,c")
10264 [(match_operand:V16QI 1 "register_operand" "x,x")
10265 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10266 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10268 (set (reg:CC FLAGS_REG)
10275 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10276 [(set_attr "type" "sselog")
10277 (set_attr "prefix_data16" "1")
10278 (set_attr "prefix_extra" "1")
10279 (set_attr "length_immediate" "1")
10280 (set_attr "prefix" "maybe_vex")
10281 (set_attr "memory" "none,load")
10282 (set_attr "mode" "TI")])
10284 (define_insn "sse4_2_pcmpistrm"
10285 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10287 [(match_operand:V16QI 1 "register_operand" "x,x")
10288 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10289 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10291 (set (reg:CC FLAGS_REG)
10298 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10299 [(set_attr "type" "sselog")
10300 (set_attr "prefix_data16" "1")
10301 (set_attr "prefix_extra" "1")
10302 (set_attr "length_immediate" "1")
10303 (set_attr "prefix" "maybe_vex")
10304 (set_attr "memory" "none,load")
10305 (set_attr "mode" "TI")])
10307 (define_insn "sse4_2_pcmpistr_cconly"
10308 [(set (reg:CC FLAGS_REG)
10310 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10311 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10312 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10314 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10315 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10318 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10319 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10320 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10321 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10322 [(set_attr "type" "sselog")
10323 (set_attr "prefix_data16" "1")
10324 (set_attr "prefix_extra" "1")
10325 (set_attr "length_immediate" "1")
10326 (set_attr "memory" "none,load,none,load")
10327 (set_attr "prefix" "maybe_vex")
10328 (set_attr "mode" "TI")])
10330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10332 ;; XOP instructions
10334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10336 ;; XOP parallel integer multiply/add instructions.
10337 ;; Note the XOP multiply/add instructions
10338 ;; a[i] = b[i] * c[i] + d[i];
10339 ;; do not allow the value being added to be a memory operation.
10340 (define_insn "xop_pmacsww"
10341 [(set (match_operand:V8HI 0 "register_operand" "=x")
10344 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10345 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10346 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10348 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10349 [(set_attr "type" "ssemuladd")
10350 (set_attr "mode" "TI")])
10352 (define_insn "xop_pmacssww"
10353 [(set (match_operand:V8HI 0 "register_operand" "=x")
10355 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10356 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10357 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10359 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10360 [(set_attr "type" "ssemuladd")
10361 (set_attr "mode" "TI")])
10363 (define_insn "xop_pmacsdd"
10364 [(set (match_operand:V4SI 0 "register_operand" "=x")
10367 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10368 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10369 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10371 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10372 [(set_attr "type" "ssemuladd")
10373 (set_attr "mode" "TI")])
10375 (define_insn "xop_pmacssdd"
10376 [(set (match_operand:V4SI 0 "register_operand" "=x")
10378 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10379 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10380 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10382 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10383 [(set_attr "type" "ssemuladd")
10384 (set_attr "mode" "TI")])
10386 (define_insn "xop_pmacssdql"
10387 [(set (match_operand:V2DI 0 "register_operand" "=x")
10392 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10393 (parallel [(const_int 1)
10396 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10397 (parallel [(const_int 1)
10399 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10401 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402 [(set_attr "type" "ssemuladd")
10403 (set_attr "mode" "TI")])
10405 (define_insn "xop_pmacssdqh"
10406 [(set (match_operand:V2DI 0 "register_operand" "=x")
10411 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10412 (parallel [(const_int 0)
10416 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10417 (parallel [(const_int 0)
10419 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10421 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10422 [(set_attr "type" "ssemuladd")
10423 (set_attr "mode" "TI")])
10425 (define_insn "xop_pmacsdql"
10426 [(set (match_operand:V2DI 0 "register_operand" "=x")
10431 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10432 (parallel [(const_int 1)
10436 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10437 (parallel [(const_int 1)
10439 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10441 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10442 [(set_attr "type" "ssemuladd")
10443 (set_attr "mode" "TI")])
10445 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10446 ;; fake it with a multiply/add. In general, we expect the define_split to
10447 ;; occur before register allocation, so we have to handle the corner case where
10448 ;; the target is the same as operands 1/2
10449 (define_insn_and_split "xop_mulv2div2di3_low"
10450 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10454 (match_operand:V4SI 1 "register_operand" "%x")
10455 (parallel [(const_int 1)
10459 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10460 (parallel [(const_int 1)
10461 (const_int 3)])))))]
10464 "&& reload_completed"
10465 [(set (match_dup 0)
10473 (parallel [(const_int 1)
10478 (parallel [(const_int 1)
10482 operands[3] = CONST0_RTX (V2DImode);
10484 [(set_attr "type" "ssemul")
10485 (set_attr "mode" "TI")])
10487 (define_insn "xop_pmacsdqh"
10488 [(set (match_operand:V2DI 0 "register_operand" "=x")
10493 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10494 (parallel [(const_int 0)
10498 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10499 (parallel [(const_int 0)
10501 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10503 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10504 [(set_attr "type" "ssemuladd")
10505 (set_attr "mode" "TI")])
10507 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10508 ;; fake it with a multiply/add. In general, we expect the define_split to
10509 ;; occur before register allocation, so we have to handle the corner case where
10510 ;; the target is the same as either operands[1] or operands[2]
10511 (define_insn_and_split "xop_mulv2div2di3_high"
10512 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10516 (match_operand:V4SI 1 "register_operand" "%x")
10517 (parallel [(const_int 0)
10521 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10522 (parallel [(const_int 0)
10523 (const_int 2)])))))]
10526 "&& reload_completed"
10527 [(set (match_dup 0)
10535 (parallel [(const_int 0)
10540 (parallel [(const_int 0)
10544 operands[3] = CONST0_RTX (V2DImode);
10546 [(set_attr "type" "ssemul")
10547 (set_attr "mode" "TI")])
10549 ;; XOP parallel integer multiply/add instructions for the intrinisics
10550 (define_insn "xop_pmacsswd"
10551 [(set (match_operand:V4SI 0 "register_operand" "=x")
10556 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10557 (parallel [(const_int 1)
10563 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10564 (parallel [(const_int 1)
10568 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10570 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10571 [(set_attr "type" "ssemuladd")
10572 (set_attr "mode" "TI")])
10574 (define_insn "xop_pmacswd"
10575 [(set (match_operand:V4SI 0 "register_operand" "=x")
10580 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10581 (parallel [(const_int 1)
10587 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10588 (parallel [(const_int 1)
10592 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10594 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10595 [(set_attr "type" "ssemuladd")
10596 (set_attr "mode" "TI")])
10598 (define_insn "xop_pmadcsswd"
10599 [(set (match_operand:V4SI 0 "register_operand" "=x")
10605 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10606 (parallel [(const_int 0)
10612 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10613 (parallel [(const_int 0)
10621 (parallel [(const_int 1)
10628 (parallel [(const_int 1)
10631 (const_int 7)])))))
10632 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10634 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10635 [(set_attr "type" "ssemuladd")
10636 (set_attr "mode" "TI")])
10638 (define_insn "xop_pmadcswd"
10639 [(set (match_operand:V4SI 0 "register_operand" "=x")
10645 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10646 (parallel [(const_int 0)
10652 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10653 (parallel [(const_int 0)
10661 (parallel [(const_int 1)
10668 (parallel [(const_int 1)
10671 (const_int 7)])))))
10672 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10674 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10675 [(set_attr "type" "ssemuladd")
10676 (set_attr "mode" "TI")])
10678 ;; XOP parallel XMM conditional moves
10679 (define_insn "xop_pcmov_<mode>"
10680 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10681 (if_then_else:SSEMODE
10682 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10683 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10684 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10686 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10687 [(set_attr "type" "sse4arg")])
10689 (define_insn "xop_pcmov_<mode>256"
10690 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10691 (if_then_else:AVX256MODE
10692 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10693 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10694 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10696 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10697 [(set_attr "type" "sse4arg")])
10699 ;; XOP horizontal add/subtract instructions
10700 (define_insn "xop_phaddbw"
10701 [(set (match_operand:V8HI 0 "register_operand" "=x")
10705 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10706 (parallel [(const_int 0)
10717 (parallel [(const_int 1)
10724 (const_int 15)])))))]
10726 "vphaddbw\t{%1, %0|%0, %1}"
10727 [(set_attr "type" "sseiadd1")])
10729 (define_insn "xop_phaddbd"
10730 [(set (match_operand:V4SI 0 "register_operand" "=x")
10735 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10736 (parallel [(const_int 0)
10743 (parallel [(const_int 1)
10746 (const_int 13)]))))
10751 (parallel [(const_int 2)
10758 (parallel [(const_int 3)
10761 (const_int 15)]))))))]
10763 "vphaddbd\t{%1, %0|%0, %1}"
10764 [(set_attr "type" "sseiadd1")])
10766 (define_insn "xop_phaddbq"
10767 [(set (match_operand:V2DI 0 "register_operand" "=x")
10773 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10774 (parallel [(const_int 0)
10779 (parallel [(const_int 1)
10785 (parallel [(const_int 2)
10790 (parallel [(const_int 3)
10791 (const_int 7)])))))
10797 (parallel [(const_int 8)
10802 (parallel [(const_int 9)
10803 (const_int 13)]))))
10808 (parallel [(const_int 10)
10813 (parallel [(const_int 11)
10814 (const_int 15)])))))))]
10816 "vphaddbq\t{%1, %0|%0, %1}"
10817 [(set_attr "type" "sseiadd1")])
10819 (define_insn "xop_phaddwd"
10820 [(set (match_operand:V4SI 0 "register_operand" "=x")
10824 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10825 (parallel [(const_int 0)
10832 (parallel [(const_int 1)
10835 (const_int 7)])))))]
10837 "vphaddwd\t{%1, %0|%0, %1}"
10838 [(set_attr "type" "sseiadd1")])
10840 (define_insn "xop_phaddwq"
10841 [(set (match_operand:V2DI 0 "register_operand" "=x")
10846 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10847 (parallel [(const_int 0)
10852 (parallel [(const_int 1)
10858 (parallel [(const_int 2)
10863 (parallel [(const_int 3)
10864 (const_int 7)]))))))]
10866 "vphaddwq\t{%1, %0|%0, %1}"
10867 [(set_attr "type" "sseiadd1")])
10869 (define_insn "xop_phadddq"
10870 [(set (match_operand:V2DI 0 "register_operand" "=x")
10874 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10875 (parallel [(const_int 0)
10880 (parallel [(const_int 1)
10881 (const_int 3)])))))]
10883 "vphadddq\t{%1, %0|%0, %1}"
10884 [(set_attr "type" "sseiadd1")])
10886 (define_insn "xop_phaddubw"
10887 [(set (match_operand:V8HI 0 "register_operand" "=x")
10891 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10892 (parallel [(const_int 0)
10903 (parallel [(const_int 1)
10910 (const_int 15)])))))]
10912 "vphaddubw\t{%1, %0|%0, %1}"
10913 [(set_attr "type" "sseiadd1")])
10915 (define_insn "xop_phaddubd"
10916 [(set (match_operand:V4SI 0 "register_operand" "=x")
10921 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10922 (parallel [(const_int 0)
10929 (parallel [(const_int 1)
10932 (const_int 13)]))))
10937 (parallel [(const_int 2)
10944 (parallel [(const_int 3)
10947 (const_int 15)]))))))]
10949 "vphaddubd\t{%1, %0|%0, %1}"
10950 [(set_attr "type" "sseiadd1")])
10952 (define_insn "xop_phaddubq"
10953 [(set (match_operand:V2DI 0 "register_operand" "=x")
10959 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10960 (parallel [(const_int 0)
10965 (parallel [(const_int 1)
10971 (parallel [(const_int 2)
10976 (parallel [(const_int 3)
10977 (const_int 7)])))))
10983 (parallel [(const_int 8)
10988 (parallel [(const_int 9)
10989 (const_int 13)]))))
10994 (parallel [(const_int 10)
10999 (parallel [(const_int 11)
11000 (const_int 15)])))))))]
11002 "vphaddubq\t{%1, %0|%0, %1}"
11003 [(set_attr "type" "sseiadd1")])
11005 (define_insn "xop_phadduwd"
11006 [(set (match_operand:V4SI 0 "register_operand" "=x")
11010 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11011 (parallel [(const_int 0)
11018 (parallel [(const_int 1)
11021 (const_int 7)])))))]
11023 "vphadduwd\t{%1, %0|%0, %1}"
11024 [(set_attr "type" "sseiadd1")])
11026 (define_insn "xop_phadduwq"
11027 [(set (match_operand:V2DI 0 "register_operand" "=x")
11032 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11033 (parallel [(const_int 0)
11038 (parallel [(const_int 1)
11044 (parallel [(const_int 2)
11049 (parallel [(const_int 3)
11050 (const_int 7)]))))))]
11052 "vphadduwq\t{%1, %0|%0, %1}"
11053 [(set_attr "type" "sseiadd1")])
11055 (define_insn "xop_phaddudq"
11056 [(set (match_operand:V2DI 0 "register_operand" "=x")
11060 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11061 (parallel [(const_int 0)
11066 (parallel [(const_int 1)
11067 (const_int 3)])))))]
11069 "vphaddudq\t{%1, %0|%0, %1}"
11070 [(set_attr "type" "sseiadd1")])
11072 (define_insn "xop_phsubbw"
11073 [(set (match_operand:V8HI 0 "register_operand" "=x")
11077 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11078 (parallel [(const_int 0)
11089 (parallel [(const_int 1)
11096 (const_int 15)])))))]
11098 "vphsubbw\t{%1, %0|%0, %1}"
11099 [(set_attr "type" "sseiadd1")])
11101 (define_insn "xop_phsubwd"
11102 [(set (match_operand:V4SI 0 "register_operand" "=x")
11106 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11107 (parallel [(const_int 0)
11114 (parallel [(const_int 1)
11117 (const_int 7)])))))]
11119 "vphsubwd\t{%1, %0|%0, %1}"
11120 [(set_attr "type" "sseiadd1")])
11122 (define_insn "xop_phsubdq"
11123 [(set (match_operand:V2DI 0 "register_operand" "=x")
11127 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11128 (parallel [(const_int 0)
11133 (parallel [(const_int 1)
11134 (const_int 3)])))))]
11136 "vphsubdq\t{%1, %0|%0, %1}"
11137 [(set_attr "type" "sseiadd1")])
11139 ;; XOP permute instructions
11140 (define_insn "xop_pperm"
11141 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11143 [(match_operand:V16QI 1 "register_operand" "x,x")
11144 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11145 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11146 UNSPEC_XOP_PERMUTE))]
11147 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11148 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11149 [(set_attr "type" "sse4arg")
11150 (set_attr "mode" "TI")])
11152 ;; XOP pack instructions that combine two vectors into a smaller vector
11153 (define_insn "xop_pperm_pack_v2di_v4si"
11154 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11157 (match_operand:V2DI 1 "register_operand" "x,x"))
11159 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11160 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11161 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11162 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11163 [(set_attr "type" "sse4arg")
11164 (set_attr "mode" "TI")])
11166 (define_insn "xop_pperm_pack_v4si_v8hi"
11167 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11170 (match_operand:V4SI 1 "register_operand" "x,x"))
11172 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11173 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11174 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11175 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11176 [(set_attr "type" "sse4arg")
11177 (set_attr "mode" "TI")])
11179 (define_insn "xop_pperm_pack_v8hi_v16qi"
11180 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11183 (match_operand:V8HI 1 "register_operand" "x,x"))
11185 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11186 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11187 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11188 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11189 [(set_attr "type" "sse4arg")
11190 (set_attr "mode" "TI")])
11192 ;; XOP packed rotate instructions
11193 (define_expand "rotl<mode>3"
11194 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11195 (rotate:SSEMODE1248
11196 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11197 (match_operand:SI 2 "general_operand")))]
11200 /* If we were given a scalar, convert it to parallel */
11201 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11203 rtvec vs = rtvec_alloc (<ssescalarnum>);
11204 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11205 rtx reg = gen_reg_rtx (<MODE>mode);
11206 rtx op2 = operands[2];
11209 if (GET_MODE (op2) != <ssescalarmode>mode)
11211 op2 = gen_reg_rtx (<ssescalarmode>mode);
11212 convert_move (op2, operands[2], false);
11215 for (i = 0; i < <ssescalarnum>; i++)
11216 RTVEC_ELT (vs, i) = op2;
11218 emit_insn (gen_vec_init<mode> (reg, par));
11219 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11224 (define_expand "rotr<mode>3"
11225 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11226 (rotatert:SSEMODE1248
11227 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11228 (match_operand:SI 2 "general_operand")))]
11231 /* If we were given a scalar, convert it to parallel */
11232 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11234 rtvec vs = rtvec_alloc (<ssescalarnum>);
11235 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11236 rtx neg = gen_reg_rtx (<MODE>mode);
11237 rtx reg = gen_reg_rtx (<MODE>mode);
11238 rtx op2 = operands[2];
11241 if (GET_MODE (op2) != <ssescalarmode>mode)
11243 op2 = gen_reg_rtx (<ssescalarmode>mode);
11244 convert_move (op2, operands[2], false);
11247 for (i = 0; i < <ssescalarnum>; i++)
11248 RTVEC_ELT (vs, i) = op2;
11250 emit_insn (gen_vec_init<mode> (reg, par));
11251 emit_insn (gen_neg<mode>2 (neg, reg));
11252 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11257 (define_insn "xop_rotl<mode>3"
11258 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11259 (rotate:SSEMODE1248
11260 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11261 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11263 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11264 [(set_attr "type" "sseishft")
11265 (set_attr "length_immediate" "1")
11266 (set_attr "mode" "TI")])
11268 (define_insn "xop_rotr<mode>3"
11269 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11270 (rotatert:SSEMODE1248
11271 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11272 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11275 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11276 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11278 [(set_attr "type" "sseishft")
11279 (set_attr "length_immediate" "1")
11280 (set_attr "mode" "TI")])
11282 (define_expand "vrotr<mode>3"
11283 [(match_operand:SSEMODE1248 0 "register_operand" "")
11284 (match_operand:SSEMODE1248 1 "register_operand" "")
11285 (match_operand:SSEMODE1248 2 "register_operand" "")]
11288 rtx reg = gen_reg_rtx (<MODE>mode);
11289 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11290 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11294 (define_expand "vrotl<mode>3"
11295 [(match_operand:SSEMODE1248 0 "register_operand" "")
11296 (match_operand:SSEMODE1248 1 "register_operand" "")
11297 (match_operand:SSEMODE1248 2 "register_operand" "")]
11300 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11304 (define_insn "xop_vrotl<mode>3"
11305 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11306 (if_then_else:SSEMODE1248
11308 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11310 (rotate:SSEMODE1248
11311 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11313 (rotatert:SSEMODE1248
11315 (neg:SSEMODE1248 (match_dup 2)))))]
11316 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11317 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11318 [(set_attr "type" "sseishft")
11319 (set_attr "prefix_data16" "0")
11320 (set_attr "prefix_extra" "2")
11321 (set_attr "mode" "TI")])
11323 ;; XOP packed shift instructions.
11324 ;; FIXME: add V2DI back in
11325 (define_expand "vlshr<mode>3"
11326 [(match_operand:SSEMODE124 0 "register_operand" "")
11327 (match_operand:SSEMODE124 1 "register_operand" "")
11328 (match_operand:SSEMODE124 2 "register_operand" "")]
11331 rtx neg = gen_reg_rtx (<MODE>mode);
11332 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11333 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11337 (define_expand "vashr<mode>3"
11338 [(match_operand:SSEMODE124 0 "register_operand" "")
11339 (match_operand:SSEMODE124 1 "register_operand" "")
11340 (match_operand:SSEMODE124 2 "register_operand" "")]
11343 rtx neg = gen_reg_rtx (<MODE>mode);
11344 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11345 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11349 (define_expand "vashl<mode>3"
11350 [(match_operand:SSEMODE124 0 "register_operand" "")
11351 (match_operand:SSEMODE124 1 "register_operand" "")
11352 (match_operand:SSEMODE124 2 "register_operand" "")]
11355 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11359 (define_insn "xop_ashl<mode>3"
11360 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11361 (if_then_else:SSEMODE1248
11363 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11365 (ashift:SSEMODE1248
11366 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11368 (ashiftrt:SSEMODE1248
11370 (neg:SSEMODE1248 (match_dup 2)))))]
11371 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11372 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11373 [(set_attr "type" "sseishft")
11374 (set_attr "prefix_data16" "0")
11375 (set_attr "prefix_extra" "2")
11376 (set_attr "mode" "TI")])
11378 (define_insn "xop_lshl<mode>3"
11379 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11380 (if_then_else:SSEMODE1248
11382 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11384 (ashift:SSEMODE1248
11385 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11387 (lshiftrt:SSEMODE1248
11389 (neg:SSEMODE1248 (match_dup 2)))))]
11390 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11391 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11392 [(set_attr "type" "sseishft")
11393 (set_attr "prefix_data16" "0")
11394 (set_attr "prefix_extra" "2")
11395 (set_attr "mode" "TI")])
11397 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11398 (define_expand "ashlv16qi3"
11399 [(match_operand:V16QI 0 "register_operand" "")
11400 (match_operand:V16QI 1 "register_operand" "")
11401 (match_operand:SI 2 "nonmemory_operand" "")]
11404 rtvec vs = rtvec_alloc (16);
11405 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11406 rtx reg = gen_reg_rtx (V16QImode);
11408 for (i = 0; i < 16; i++)
11409 RTVEC_ELT (vs, i) = operands[2];
11411 emit_insn (gen_vec_initv16qi (reg, par));
11412 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11416 (define_expand "lshlv16qi3"
11417 [(match_operand:V16QI 0 "register_operand" "")
11418 (match_operand:V16QI 1 "register_operand" "")
11419 (match_operand:SI 2 "nonmemory_operand" "")]
11422 rtvec vs = rtvec_alloc (16);
11423 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11424 rtx reg = gen_reg_rtx (V16QImode);
11426 for (i = 0; i < 16; i++)
11427 RTVEC_ELT (vs, i) = operands[2];
11429 emit_insn (gen_vec_initv16qi (reg, par));
11430 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11434 (define_expand "ashrv16qi3"
11435 [(match_operand:V16QI 0 "register_operand" "")
11436 (match_operand:V16QI 1 "register_operand" "")
11437 (match_operand:SI 2 "nonmemory_operand" "")]
11440 rtvec vs = rtvec_alloc (16);
11441 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11442 rtx reg = gen_reg_rtx (V16QImode);
11444 rtx ele = ((CONST_INT_P (operands[2]))
11445 ? GEN_INT (- INTVAL (operands[2]))
11448 for (i = 0; i < 16; i++)
11449 RTVEC_ELT (vs, i) = ele;
11451 emit_insn (gen_vec_initv16qi (reg, par));
11453 if (!CONST_INT_P (operands[2]))
11455 rtx neg = gen_reg_rtx (V16QImode);
11456 emit_insn (gen_negv16qi2 (neg, reg));
11457 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11460 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11465 (define_expand "ashrv2di3"
11466 [(match_operand:V2DI 0 "register_operand" "")
11467 (match_operand:V2DI 1 "register_operand" "")
11468 (match_operand:DI 2 "nonmemory_operand" "")]
11471 rtvec vs = rtvec_alloc (2);
11472 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11473 rtx reg = gen_reg_rtx (V2DImode);
11476 if (CONST_INT_P (operands[2]))
11477 ele = GEN_INT (- INTVAL (operands[2]));
11478 else if (GET_MODE (operands[2]) != DImode)
11480 rtx move = gen_reg_rtx (DImode);
11481 ele = gen_reg_rtx (DImode);
11482 convert_move (move, operands[2], false);
11483 emit_insn (gen_negdi2 (ele, move));
11487 ele = gen_reg_rtx (DImode);
11488 emit_insn (gen_negdi2 (ele, operands[2]));
11491 RTVEC_ELT (vs, 0) = ele;
11492 RTVEC_ELT (vs, 1) = ele;
11493 emit_insn (gen_vec_initv2di (reg, par));
11494 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11498 ;; XOP FRCZ support
11500 (define_insn "xop_frcz<mode>2"
11501 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11503 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11506 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11507 [(set_attr "type" "ssecvt1")
11508 (set_attr "mode" "<MODE>")])
11511 (define_insn "xop_vmfrcz<mode>2"
11512 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11513 (vec_merge:SSEMODEF2P
11515 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11517 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11520 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11521 [(set_attr "type" "ssecvt1")
11522 (set_attr "mode" "<MODE>")])
11524 (define_insn "xop_frcz<mode>2256"
11525 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11527 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11530 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11531 [(set_attr "type" "ssecvt1")
11532 (set_attr "mode" "<MODE>")])
11534 (define_insn "xop_maskcmp<mode>3"
11535 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11536 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11537 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11538 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11540 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11541 [(set_attr "type" "sse4arg")
11542 (set_attr "prefix_data16" "0")
11543 (set_attr "prefix_rep" "0")
11544 (set_attr "prefix_extra" "2")
11545 (set_attr "length_immediate" "1")
11546 (set_attr "mode" "TI")])
11548 (define_insn "xop_maskcmp_uns<mode>3"
11549 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11550 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11551 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11552 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11554 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11555 [(set_attr "type" "ssecmp")
11556 (set_attr "prefix_data16" "0")
11557 (set_attr "prefix_rep" "0")
11558 (set_attr "prefix_extra" "2")
11559 (set_attr "length_immediate" "1")
11560 (set_attr "mode" "TI")])
11562 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11563 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11564 ;; the exact instruction generated for the intrinsic.
11565 (define_insn "xop_maskcmp_uns2<mode>3"
11566 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11567 (unspec:SSEMODE1248
11568 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11569 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11570 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11571 UNSPEC_XOP_UNSIGNED_CMP))]
11573 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11574 [(set_attr "type" "ssecmp")
11575 (set_attr "prefix_data16" "0")
11576 (set_attr "prefix_extra" "2")
11577 (set_attr "length_immediate" "1")
11578 (set_attr "mode" "TI")])
11580 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11581 ;; being added here to be complete.
11582 (define_insn "xop_pcom_tf<mode>3"
11583 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11584 (unspec:SSEMODE1248
11585 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11586 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11587 (match_operand:SI 3 "const_int_operand" "n")]
11588 UNSPEC_XOP_TRUEFALSE))]
11591 return ((INTVAL (operands[3]) != 0)
11592 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11593 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11595 [(set_attr "type" "ssecmp")
11596 (set_attr "prefix_data16" "0")
11597 (set_attr "prefix_extra" "2")
11598 (set_attr "length_immediate" "1")
11599 (set_attr "mode" "TI")])
11601 (define_insn "xop_vpermil2<mode>3"
11602 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11604 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11605 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11606 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11607 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11610 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11611 [(set_attr "type" "sse4arg")
11612 (set_attr "length_immediate" "1")
11613 (set_attr "mode" "<MODE>")])
11615 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11616 (define_insn "*avx_aesenc"
11617 [(set (match_operand:V2DI 0 "register_operand" "=x")
11618 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11619 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11621 "TARGET_AES && TARGET_AVX"
11622 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11623 [(set_attr "type" "sselog1")
11624 (set_attr "prefix_extra" "1")
11625 (set_attr "prefix" "vex")
11626 (set_attr "mode" "TI")])
11628 (define_insn "aesenc"
11629 [(set (match_operand:V2DI 0 "register_operand" "=x")
11630 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11631 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11634 "aesenc\t{%2, %0|%0, %2}"
11635 [(set_attr "type" "sselog1")
11636 (set_attr "prefix_extra" "1")
11637 (set_attr "mode" "TI")])
11639 (define_insn "*avx_aesenclast"
11640 [(set (match_operand:V2DI 0 "register_operand" "=x")
11641 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11642 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11643 UNSPEC_AESENCLAST))]
11644 "TARGET_AES && TARGET_AVX"
11645 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11646 [(set_attr "type" "sselog1")
11647 (set_attr "prefix_extra" "1")
11648 (set_attr "prefix" "vex")
11649 (set_attr "mode" "TI")])
11651 (define_insn "aesenclast"
11652 [(set (match_operand:V2DI 0 "register_operand" "=x")
11653 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11654 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11655 UNSPEC_AESENCLAST))]
11657 "aesenclast\t{%2, %0|%0, %2}"
11658 [(set_attr "type" "sselog1")
11659 (set_attr "prefix_extra" "1")
11660 (set_attr "mode" "TI")])
11662 (define_insn "*avx_aesdec"
11663 [(set (match_operand:V2DI 0 "register_operand" "=x")
11664 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11665 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11667 "TARGET_AES && TARGET_AVX"
11668 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11669 [(set_attr "type" "sselog1")
11670 (set_attr "prefix_extra" "1")
11671 (set_attr "prefix" "vex")
11672 (set_attr "mode" "TI")])
11674 (define_insn "aesdec"
11675 [(set (match_operand:V2DI 0 "register_operand" "=x")
11676 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11677 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11680 "aesdec\t{%2, %0|%0, %2}"
11681 [(set_attr "type" "sselog1")
11682 (set_attr "prefix_extra" "1")
11683 (set_attr "mode" "TI")])
11685 (define_insn "*avx_aesdeclast"
11686 [(set (match_operand:V2DI 0 "register_operand" "=x")
11687 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11688 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11689 UNSPEC_AESDECLAST))]
11690 "TARGET_AES && TARGET_AVX"
11691 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11692 [(set_attr "type" "sselog1")
11693 (set_attr "prefix_extra" "1")
11694 (set_attr "prefix" "vex")
11695 (set_attr "mode" "TI")])
11697 (define_insn "aesdeclast"
11698 [(set (match_operand:V2DI 0 "register_operand" "=x")
11699 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11700 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11701 UNSPEC_AESDECLAST))]
11703 "aesdeclast\t{%2, %0|%0, %2}"
11704 [(set_attr "type" "sselog1")
11705 (set_attr "prefix_extra" "1")
11706 (set_attr "mode" "TI")])
11708 (define_insn "aesimc"
11709 [(set (match_operand:V2DI 0 "register_operand" "=x")
11710 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11713 "%vaesimc\t{%1, %0|%0, %1}"
11714 [(set_attr "type" "sselog1")
11715 (set_attr "prefix_extra" "1")
11716 (set_attr "prefix" "maybe_vex")
11717 (set_attr "mode" "TI")])
11719 (define_insn "aeskeygenassist"
11720 [(set (match_operand:V2DI 0 "register_operand" "=x")
11721 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11722 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11723 UNSPEC_AESKEYGENASSIST))]
11725 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11726 [(set_attr "type" "sselog1")
11727 (set_attr "prefix_extra" "1")
11728 (set_attr "length_immediate" "1")
11729 (set_attr "prefix" "maybe_vex")
11730 (set_attr "mode" "TI")])
11732 (define_insn "*vpclmulqdq"
11733 [(set (match_operand:V2DI 0 "register_operand" "=x")
11734 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11735 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11736 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11738 "TARGET_PCLMUL && TARGET_AVX"
11739 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11740 [(set_attr "type" "sselog1")
11741 (set_attr "prefix_extra" "1")
11742 (set_attr "length_immediate" "1")
11743 (set_attr "prefix" "vex")
11744 (set_attr "mode" "TI")])
11746 (define_insn "pclmulqdq"
11747 [(set (match_operand:V2DI 0 "register_operand" "=x")
11748 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11749 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11750 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11753 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11754 [(set_attr "type" "sselog1")
11755 (set_attr "prefix_extra" "1")
11756 (set_attr "length_immediate" "1")
11757 (set_attr "mode" "TI")])
11759 (define_expand "avx_vzeroall"
11760 [(match_par_dup 0 [(const_int 0)])]
11763 int nregs = TARGET_64BIT ? 16 : 8;
11766 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11768 XVECEXP (operands[0], 0, 0)
11769 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11772 for (regno = 0; regno < nregs; regno++)
11773 XVECEXP (operands[0], 0, regno + 1)
11774 = gen_rtx_SET (VOIDmode,
11775 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11776 CONST0_RTX (V8SImode));
11779 (define_insn "*avx_vzeroall"
11780 [(match_parallel 0 "vzeroall_operation"
11781 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11784 [(set_attr "type" "sse")
11785 (set_attr "modrm" "0")
11786 (set_attr "memory" "none")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "OI")])
11790 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11791 (define_expand "avx_vzeroupper"
11792 [(match_par_dup 0 [(const_int 0)])]
11795 int nregs = TARGET_64BIT ? 16 : 8;
11798 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11800 XVECEXP (operands[0], 0, 0)
11801 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11802 UNSPECV_VZEROUPPER);
11804 for (regno = 0; regno < nregs; regno++)
11805 XVECEXP (operands[0], 0, regno + 1)
11806 = gen_rtx_CLOBBER (VOIDmode,
11807 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11810 (define_insn "*avx_vzeroupper"
11811 [(match_parallel 0 "vzeroupper_operation"
11812 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11815 [(set_attr "type" "sse")
11816 (set_attr "modrm" "0")
11817 (set_attr "memory" "none")
11818 (set_attr "prefix" "vex")
11819 (set_attr "mode" "OI")])
11821 (define_insn_and_split "vec_dup<mode>"
11822 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11823 (vec_duplicate:AVX256MODE24P
11824 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11827 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11829 "&& reload_completed && REG_P (operands[1])"
11830 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11831 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11833 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11835 [(set_attr "type" "ssemov")
11836 (set_attr "prefix_extra" "1")
11837 (set_attr "prefix" "vex")
11838 (set_attr "mode" "V8SF")])
11840 (define_insn "avx_vbroadcastf128_<mode>"
11841 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11842 (vec_concat:AVX256MODE
11843 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11847 vbroadcastf128\t{%1, %0|%0, %1}
11848 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11849 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11850 [(set_attr "type" "ssemov,sselog1,sselog1")
11851 (set_attr "prefix_extra" "1")
11852 (set_attr "length_immediate" "0,1,1")
11853 (set_attr "prefix" "vex")
11854 (set_attr "mode" "V4SF,V8SF,V8SF")])
11856 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11857 ;; If it so happens that the input is in memory, use vbroadcast.
11858 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11859 (define_insn "*avx_vperm_broadcast_v4sf"
11860 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11862 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11863 (match_parallel 2 "avx_vbroadcast_operand"
11864 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11867 int elt = INTVAL (operands[3]);
11868 switch (which_alternative)
11872 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11873 return "vbroadcastss\t{%1, %0|%0, %1}";
11875 operands[2] = GEN_INT (elt * 0x55);
11876 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11878 gcc_unreachable ();
11881 [(set_attr "type" "ssemov,ssemov,sselog1")
11882 (set_attr "prefix_extra" "1")
11883 (set_attr "length_immediate" "0,0,1")
11884 (set_attr "prefix" "vex")
11885 (set_attr "mode" "SF,SF,V4SF")])
11887 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11888 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11889 (vec_select:AVX256MODEF2P
11890 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11891 (match_parallel 2 "avx_vbroadcast_operand"
11892 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11895 "&& reload_completed"
11896 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11898 rtx op0 = operands[0], op1 = operands[1];
11899 int elt = INTVAL (operands[3]);
11905 /* Shuffle element we care about into all elements of the 128-bit lane.
11906 The other lane gets shuffled too, but we don't care. */
11907 if (<MODE>mode == V4DFmode)
11908 mask = (elt & 1 ? 15 : 0);
11910 mask = (elt & 3) * 0x55;
11911 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11913 /* Shuffle the lane we care about into both lanes of the dest. */
11914 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11915 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11919 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11920 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11923 (define_expand "avx_vpermil<mode>"
11924 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11925 (vec_select:AVXMODEFDP
11926 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11927 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11930 int mask = INTVAL (operands[2]);
11931 rtx perm[<ssescalarnum>];
11933 perm[0] = GEN_INT (mask & 1);
11934 perm[1] = GEN_INT ((mask >> 1) & 1);
11935 if (<MODE>mode == V4DFmode)
11937 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11938 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11942 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11945 (define_expand "avx_vpermil<mode>"
11946 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11947 (vec_select:AVXMODEFSP
11948 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11949 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11952 int mask = INTVAL (operands[2]);
11953 rtx perm[<ssescalarnum>];
11955 perm[0] = GEN_INT (mask & 3);
11956 perm[1] = GEN_INT ((mask >> 2) & 3);
11957 perm[2] = GEN_INT ((mask >> 4) & 3);
11958 perm[3] = GEN_INT ((mask >> 6) & 3);
11959 if (<MODE>mode == V8SFmode)
11961 perm[4] = GEN_INT ((mask & 3) + 4);
11962 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11963 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11964 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11968 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11971 (define_insn "*avx_vpermilp<mode>"
11972 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11973 (vec_select:AVXMODEF2P
11974 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11975 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11976 [(match_operand 3 "const_int_operand" "")])))]
11979 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11980 operands[2] = GEN_INT (mask);
11981 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11983 [(set_attr "type" "sselog")
11984 (set_attr "prefix_extra" "1")
11985 (set_attr "length_immediate" "1")
11986 (set_attr "prefix" "vex")
11987 (set_attr "mode" "<MODE>")])
11989 (define_insn "avx_vpermilvar<mode>3"
11990 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11992 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11993 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11996 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11997 [(set_attr "type" "sselog")
11998 (set_attr "prefix_extra" "1")
11999 (set_attr "prefix" "vex")
12000 (set_attr "mode" "<MODE>")])
12002 (define_expand "avx_vperm2f128<mode>3"
12003 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12004 (unspec:AVX256MODE2P
12005 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12006 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12007 (match_operand:SI 3 "const_0_to_255_operand" "")]
12008 UNSPEC_VPERMIL2F128))]
12011 int mask = INTVAL (operands[3]);
12012 if ((mask & 0x88) == 0)
12014 rtx perm[<ssescalarnum>], t1, t2;
12015 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12017 base = (mask & 3) * nelt2;
12018 for (i = 0; i < nelt2; ++i)
12019 perm[i] = GEN_INT (base + i);
12021 base = ((mask >> 4) & 3) * nelt2;
12022 for (i = 0; i < nelt2; ++i)
12023 perm[i + nelt2] = GEN_INT (base + i);
12025 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
12026 operands[1], operands[2]);
12027 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12028 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12029 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12035 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12036 ;; means that in order to represent this properly in rtl we'd have to
12037 ;; nest *another* vec_concat with a zero operand and do the select from
12038 ;; a 4x wide vector. That doesn't seem very nice.
12039 (define_insn "*avx_vperm2f128<mode>_full"
12040 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12041 (unspec:AVX256MODE2P
12042 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12043 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12044 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12045 UNSPEC_VPERMIL2F128))]
12047 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12048 [(set_attr "type" "sselog")
12049 (set_attr "prefix_extra" "1")
12050 (set_attr "length_immediate" "1")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "V8SF")])
12054 (define_insn "*avx_vperm2f128<mode>_nozero"
12055 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12056 (vec_select:AVX256MODE2P
12057 (vec_concat:<ssedoublesizemode>
12058 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12059 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12060 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
12061 [(match_operand 4 "const_int_operand" "")])))]
12064 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12065 operands[3] = GEN_INT (mask);
12066 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12068 [(set_attr "type" "sselog")
12069 (set_attr "prefix_extra" "1")
12070 (set_attr "length_immediate" "1")
12071 (set_attr "prefix" "vex")
12072 (set_attr "mode" "V8SF")])
12074 (define_expand "avx_vinsertf128<mode>"
12075 [(match_operand:AVX256MODE 0 "register_operand" "")
12076 (match_operand:AVX256MODE 1 "register_operand" "")
12077 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12078 (match_operand:SI 3 "const_0_to_1_operand" "")]
12081 switch (INTVAL (operands[3]))
12084 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12088 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12092 gcc_unreachable ();
12097 (define_insn "vec_set_lo_<mode>"
12098 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12099 (vec_concat:AVX256MODE4P
12100 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12101 (vec_select:<avxhalfvecmode>
12102 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12103 (parallel [(const_int 2) (const_int 3)]))))]
12105 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12106 [(set_attr "type" "sselog")
12107 (set_attr "prefix_extra" "1")
12108 (set_attr "length_immediate" "1")
12109 (set_attr "prefix" "vex")
12110 (set_attr "mode" "V8SF")])
12112 (define_insn "vec_set_hi_<mode>"
12113 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12114 (vec_concat:AVX256MODE4P
12115 (vec_select:<avxhalfvecmode>
12116 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12117 (parallel [(const_int 0) (const_int 1)]))
12118 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12120 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12121 [(set_attr "type" "sselog")
12122 (set_attr "prefix_extra" "1")
12123 (set_attr "length_immediate" "1")
12124 (set_attr "prefix" "vex")
12125 (set_attr "mode" "V8SF")])
12127 (define_insn "vec_set_lo_<mode>"
12128 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12129 (vec_concat:AVX256MODE8P
12130 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12131 (vec_select:<avxhalfvecmode>
12132 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12133 (parallel [(const_int 4) (const_int 5)
12134 (const_int 6) (const_int 7)]))))]
12136 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12137 [(set_attr "type" "sselog")
12138 (set_attr "prefix_extra" "1")
12139 (set_attr "length_immediate" "1")
12140 (set_attr "prefix" "vex")
12141 (set_attr "mode" "V8SF")])
12143 (define_insn "vec_set_hi_<mode>"
12144 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12145 (vec_concat:AVX256MODE8P
12146 (vec_select:<avxhalfvecmode>
12147 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12148 (parallel [(const_int 0) (const_int 1)
12149 (const_int 2) (const_int 3)]))
12150 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12152 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12153 [(set_attr "type" "sselog")
12154 (set_attr "prefix_extra" "1")
12155 (set_attr "length_immediate" "1")
12156 (set_attr "prefix" "vex")
12157 (set_attr "mode" "V8SF")])
12159 (define_insn "vec_set_lo_v16hi"
12160 [(set (match_operand:V16HI 0 "register_operand" "=x")
12162 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12164 (match_operand:V16HI 1 "register_operand" "x")
12165 (parallel [(const_int 8) (const_int 9)
12166 (const_int 10) (const_int 11)
12167 (const_int 12) (const_int 13)
12168 (const_int 14) (const_int 15)]))))]
12170 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12171 [(set_attr "type" "sselog")
12172 (set_attr "prefix_extra" "1")
12173 (set_attr "length_immediate" "1")
12174 (set_attr "prefix" "vex")
12175 (set_attr "mode" "V8SF")])
12177 (define_insn "vec_set_hi_v16hi"
12178 [(set (match_operand:V16HI 0 "register_operand" "=x")
12181 (match_operand:V16HI 1 "register_operand" "x")
12182 (parallel [(const_int 0) (const_int 1)
12183 (const_int 2) (const_int 3)
12184 (const_int 4) (const_int 5)
12185 (const_int 6) (const_int 7)]))
12186 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12188 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12189 [(set_attr "type" "sselog")
12190 (set_attr "prefix_extra" "1")
12191 (set_attr "length_immediate" "1")
12192 (set_attr "prefix" "vex")
12193 (set_attr "mode" "V8SF")])
12195 (define_insn "vec_set_lo_v32qi"
12196 [(set (match_operand:V32QI 0 "register_operand" "=x")
12198 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12200 (match_operand:V32QI 1 "register_operand" "x")
12201 (parallel [(const_int 16) (const_int 17)
12202 (const_int 18) (const_int 19)
12203 (const_int 20) (const_int 21)
12204 (const_int 22) (const_int 23)
12205 (const_int 24) (const_int 25)
12206 (const_int 26) (const_int 27)
12207 (const_int 28) (const_int 29)
12208 (const_int 30) (const_int 31)]))))]
12210 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12211 [(set_attr "type" "sselog")
12212 (set_attr "prefix_extra" "1")
12213 (set_attr "length_immediate" "1")
12214 (set_attr "prefix" "vex")
12215 (set_attr "mode" "V8SF")])
12217 (define_insn "vec_set_hi_v32qi"
12218 [(set (match_operand:V32QI 0 "register_operand" "=x")
12221 (match_operand:V32QI 1 "register_operand" "x")
12222 (parallel [(const_int 0) (const_int 1)
12223 (const_int 2) (const_int 3)
12224 (const_int 4) (const_int 5)
12225 (const_int 6) (const_int 7)
12226 (const_int 8) (const_int 9)
12227 (const_int 10) (const_int 11)
12228 (const_int 12) (const_int 13)
12229 (const_int 14) (const_int 15)]))
12230 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12232 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12233 [(set_attr "type" "sselog")
12234 (set_attr "prefix_extra" "1")
12235 (set_attr "length_immediate" "1")
12236 (set_attr "prefix" "vex")
12237 (set_attr "mode" "V8SF")])
12239 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12240 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12242 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12243 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12247 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12248 [(set_attr "type" "sselog1")
12249 (set_attr "prefix_extra" "1")
12250 (set_attr "prefix" "vex")
12251 (set_attr "mode" "<MODE>")])
12253 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12254 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12256 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12257 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12259 UNSPEC_MASKSTORE))]
12261 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12262 [(set_attr "type" "sselog1")
12263 (set_attr "prefix_extra" "1")
12264 (set_attr "prefix" "vex")
12265 (set_attr "mode" "<MODE>")])
12267 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12268 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12269 (unspec:AVX256MODE2P
12270 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12274 "&& reload_completed"
12277 rtx op1 = operands[1];
12279 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12281 op1 = gen_lowpart (<MODE>mode, op1);
12282 emit_move_insn (operands[0], op1);
12286 (define_expand "vec_init<mode>"
12287 [(match_operand:AVX256MODE 0 "register_operand" "")
12288 (match_operand 1 "" "")]
12291 ix86_expand_vector_init (false, operands[0], operands[1]);
12295 (define_insn "*vec_concat<mode>_avx"
12296 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12297 (vec_concat:AVX256MODE
12298 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12299 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12302 switch (which_alternative)
12305 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12307 switch (get_attr_mode (insn))
12310 return "vmovaps\t{%1, %x0|%x0, %1}";
12312 return "vmovapd\t{%1, %x0|%x0, %1}";
12314 return "vmovdqa\t{%1, %x0|%x0, %1}";
12317 gcc_unreachable ();
12320 [(set_attr "type" "sselog,ssemov")
12321 (set_attr "prefix_extra" "1,*")
12322 (set_attr "length_immediate" "1,*")
12323 (set_attr "prefix" "vex")
12324 (set_attr "mode" "<avxvecmode>")])