1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
53 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
55 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
56 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
57 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
58 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
59 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
60 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
61 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
62 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
64 ;; Int-float size matches
65 (define_mode_iterator SSEMODE4S [V4SF V4SI])
66 (define_mode_iterator SSEMODE2D [V2DF V2DI])
68 ;; Modes handled by integer vcond pattern
69 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
70 (V2DI "TARGET_SSE4_2")])
72 ;; Mapping from float mode to required SSE level
73 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
75 ;; Mapping from integer vector mode to mnemonic suffix
76 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
78 ;; Mapping of the fma4 suffix
79 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
81 (V4SF "ss") (V2DF "sd")])
83 ;; Mapping of the avx suffix
84 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
85 (V4SF "ps") (V2DF "pd")])
87 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
89 ;; Mapping of the max integer size for xop rotate immediate constraint
90 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
92 ;; Mapping of vector modes back to the scalar modes
93 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
94 (V16QI "QI") (V8HI "HI")
95 (V4SI "SI") (V2DI "DI")])
97 ;; Mapping of vector modes to a vector mode of double size
98 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
99 (V4SF "V8SF") (V4SI "V8SI")])
101 ;; Number of scalar elements in each vector type
102 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
103 (V16QI "16") (V8HI "8")
104 (V4SI "4") (V2DI "2")])
107 (define_mode_attr avxvecmode
108 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
109 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
110 (V8SF "V8SF") (V4DF "V4DF")])
111 (define_mode_attr avxvecpsmode
112 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
113 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
114 (define_mode_attr avxhalfvecmode
115 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
116 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
117 (define_mode_attr avxscalarmode
118 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
119 (V8SF "SF") (V4DF "DF")])
120 (define_mode_attr avxcvtvecmode
121 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
122 (define_mode_attr avxpermvecmode
123 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
124 (define_mode_attr avxmodesuffixf2c
125 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
126 (define_mode_attr avxmodesuffixp
127 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
129 (define_mode_attr avxmodesuffix
130 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
131 (V8SI "256") (V8SF "256") (V4DF "256")])
133 ;; Mapping of immediate bits for blend instructions
134 (define_mode_attr blendbits
135 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
137 ;; Mapping of immediate bits for vpermil instructions
138 (define_mode_attr vpermilbits
139 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
141 ;; Mapping of immediate bits for pinsr instructions
142 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
144 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
152 (define_expand "mov<mode>"
153 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
154 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
157 ix86_expand_vector_move (<MODE>mode, operands);
161 (define_insn "*avx_mov<mode>_internal"
162 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
163 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
165 && (register_operand (operands[0], <MODE>mode)
166 || register_operand (operands[1], <MODE>mode))"
168 switch (which_alternative)
171 return standard_sse_constant_opcode (insn, operands[1]);
174 switch (get_attr_mode (insn))
178 return "vmovaps\t{%1, %0|%0, %1}";
181 return "vmovapd\t{%1, %0|%0, %1}";
183 return "vmovdqa\t{%1, %0|%0, %1}";
189 [(set_attr "type" "sselog1,ssemov,ssemov")
190 (set_attr "prefix" "vex")
191 (set_attr "mode" "<avxvecmode>")])
193 ;; All of these patterns are enabled for SSE1 as well as SSE2.
194 ;; This is essential for maintaining stable calling conventions.
196 (define_expand "mov<mode>"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
198 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
201 ix86_expand_vector_move (<MODE>mode, operands);
205 (define_insn "*mov<mode>_internal"
206 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
207 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
209 && (register_operand (operands[0], <MODE>mode)
210 || register_operand (operands[1], <MODE>mode))"
212 switch (which_alternative)
215 return standard_sse_constant_opcode (insn, operands[1]);
218 switch (get_attr_mode (insn))
221 return "movaps\t{%1, %0|%0, %1}";
223 return "movapd\t{%1, %0|%0, %1}";
225 return "movdqa\t{%1, %0|%0, %1}";
231 [(set_attr "type" "sselog1,ssemov,ssemov")
233 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
234 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
235 (and (eq_attr "alternative" "2")
236 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
238 (const_string "V4SF")
239 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
240 (const_string "V4SF")
241 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
242 (const_string "V2DF")
244 (const_string "TI")))])
246 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
247 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
248 ;; from memory, we'd prefer to load the memory directly into the %xmm
249 ;; register. To facilitate this happy circumstance, this pattern won't
250 ;; split until after register allocation. If the 64-bit value didn't
251 ;; come from memory, this is the best we can do. This is much better
252 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
255 (define_insn_and_split "movdi_to_sse"
257 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
258 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
259 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
260 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
262 "&& reload_completed"
265 if (register_operand (operands[1], DImode))
267 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
268 Assemble the 64-bit DImode value in an xmm register. */
269 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
270 gen_rtx_SUBREG (SImode, operands[1], 0)));
271 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
272 gen_rtx_SUBREG (SImode, operands[1], 4)));
273 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
275 else if (memory_operand (operands[1], DImode))
276 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
282 [(set (match_operand:V4SF 0 "register_operand" "")
283 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
284 "TARGET_SSE && reload_completed"
287 (vec_duplicate:V4SF (match_dup 1))
291 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
292 operands[2] = CONST0_RTX (V4SFmode);
296 [(set (match_operand:V2DF 0 "register_operand" "")
297 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
298 "TARGET_SSE2 && reload_completed"
299 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
301 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
302 operands[2] = CONST0_RTX (DFmode);
305 (define_expand "push<mode>1"
306 [(match_operand:AVX256MODE 0 "register_operand" "")]
309 ix86_expand_push (<MODE>mode, operands[0]);
313 (define_expand "push<mode>1"
314 [(match_operand:SSEMODE 0 "register_operand" "")]
317 ix86_expand_push (<MODE>mode, operands[0]);
321 (define_expand "movmisalign<mode>"
322 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
323 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
326 ix86_expand_vector_move_misalign (<MODE>mode, operands);
330 (define_expand "movmisalign<mode>"
331 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
332 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
335 ix86_expand_vector_move_misalign (<MODE>mode, operands);
339 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
340 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
342 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
344 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
345 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
346 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
347 [(set_attr "type" "ssemov")
348 (set_attr "movu" "1")
349 (set_attr "prefix" "vex")
350 (set_attr "mode" "<MODE>")])
352 (define_insn "sse2_movq128"
353 [(set (match_operand:V2DI 0 "register_operand" "=x")
356 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
357 (parallel [(const_int 0)]))
360 "%vmovq\t{%1, %0|%0, %1}"
361 [(set_attr "type" "ssemov")
362 (set_attr "prefix" "maybe_vex")
363 (set_attr "mode" "TI")])
365 (define_insn "<sse>_movup<ssemodesuffixf2c>"
366 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
368 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
370 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
371 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
372 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
373 [(set_attr "type" "ssemov")
374 (set_attr "movu" "1")
375 (set_attr "mode" "<MODE>")])
377 (define_insn "avx_movdqu<avxmodesuffix>"
378 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
380 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
382 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
383 "vmovdqu\t{%1, %0|%0, %1}"
384 [(set_attr "type" "ssemov")
385 (set_attr "movu" "1")
386 (set_attr "prefix" "vex")
387 (set_attr "mode" "<avxvecmode>")])
389 (define_insn "sse2_movdqu"
390 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
391 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
393 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
394 "movdqu\t{%1, %0|%0, %1}"
395 [(set_attr "type" "ssemov")
396 (set_attr "movu" "1")
397 (set_attr "prefix_data16" "1")
398 (set_attr "mode" "TI")])
400 (define_insn "avx_movnt<mode>"
401 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
403 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
405 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
406 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
407 [(set_attr "type" "ssemov")
408 (set_attr "prefix" "vex")
409 (set_attr "mode" "<MODE>")])
411 (define_insn "<sse>_movnt<mode>"
412 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
414 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
416 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
417 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "mode" "<MODE>")])
421 (define_insn "avx_movnt<mode>"
422 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
424 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
427 "vmovntdq\t{%1, %0|%0, %1}"
428 [(set_attr "type" "ssecvt")
429 (set_attr "prefix" "vex")
430 (set_attr "mode" "<avxvecmode>")])
432 (define_insn "sse2_movntv2di"
433 [(set (match_operand:V2DI 0 "memory_operand" "=m")
434 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
437 "movntdq\t{%1, %0|%0, %1}"
438 [(set_attr "type" "ssemov")
439 (set_attr "prefix_data16" "1")
440 (set_attr "mode" "TI")])
442 (define_insn "sse2_movntsi"
443 [(set (match_operand:SI 0 "memory_operand" "=m")
444 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
447 "movnti\t{%1, %0|%0, %1}"
448 [(set_attr "type" "ssemov")
449 (set_attr "prefix_data16" "0")
450 (set_attr "mode" "V2DF")])
452 (define_insn "avx_lddqu<avxmodesuffix>"
453 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
455 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
458 "vlddqu\t{%1, %0|%0, %1}"
459 [(set_attr "type" "ssecvt")
460 (set_attr "movu" "1")
461 (set_attr "prefix" "vex")
462 (set_attr "mode" "<avxvecmode>")])
464 (define_insn "sse3_lddqu"
465 [(set (match_operand:V16QI 0 "register_operand" "=x")
466 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
469 "lddqu\t{%1, %0|%0, %1}"
470 [(set_attr "type" "ssemov")
471 (set_attr "movu" "1")
472 (set_attr "prefix_data16" "0")
473 (set_attr "prefix_rep" "1")
474 (set_attr "mode" "TI")])
476 ; Expand patterns for non-temporal stores. At the moment, only those
477 ; that directly map to insns are defined; it would be possible to
478 ; define patterns for other modes that would expand to several insns.
480 (define_expand "storent<mode>"
481 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
483 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
485 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
488 (define_expand "storent<mode>"
489 [(set (match_operand:MODEF 0 "memory_operand" "")
491 [(match_operand:MODEF 1 "register_operand" "")]
496 (define_expand "storentv2di"
497 [(set (match_operand:V2DI 0 "memory_operand" "")
498 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
503 (define_expand "storentsi"
504 [(set (match_operand:SI 0 "memory_operand" "")
505 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
510 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
512 ;; Parallel floating point arithmetic
514 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
516 (define_expand "<code><mode>2"
517 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
519 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
520 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
521 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
523 (define_expand "<plusminus_insn><mode>3"
524 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
525 (plusminus:AVX256MODEF2P
526 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
527 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
528 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
529 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
531 (define_insn "*avx_<plusminus_insn><mode>3"
532 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
533 (plusminus:AVXMODEF2P
534 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
535 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
536 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
537 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
538 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
539 [(set_attr "type" "sseadd")
540 (set_attr "prefix" "vex")
541 (set_attr "mode" "<avxvecmode>")])
543 (define_expand "<plusminus_insn><mode>3"
544 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
545 (plusminus:SSEMODEF2P
546 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
547 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
549 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
551 (define_insn "*<plusminus_insn><mode>3"
552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
553 (plusminus:SSEMODEF2P
554 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
555 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
556 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
557 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
558 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
559 [(set_attr "type" "sseadd")
560 (set_attr "mode" "<MODE>")])
562 (define_insn "*avx_vm<plusminus_insn><mode>3"
563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
564 (vec_merge:SSEMODEF2P
565 (plusminus:SSEMODEF2P
566 (match_operand:SSEMODEF2P 1 "register_operand" "x")
567 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
570 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
571 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
572 [(set_attr "type" "sseadd")
573 (set_attr "prefix" "vex")
574 (set_attr "mode" "<ssescalarmode>")])
576 (define_insn "<sse>_vm<plusminus_insn><mode>3"
577 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
578 (vec_merge:SSEMODEF2P
579 (plusminus:SSEMODEF2P
580 (match_operand:SSEMODEF2P 1 "register_operand" "0")
581 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
584 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
585 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sseadd")
587 (set_attr "mode" "<ssescalarmode>")])
589 (define_expand "mul<mode>3"
590 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
592 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
593 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
594 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
595 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
597 (define_insn "*avx_mul<mode>3"
598 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
600 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
601 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
602 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
603 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
604 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
605 [(set_attr "type" "ssemul")
606 (set_attr "prefix" "vex")
607 (set_attr "mode" "<avxvecmode>")])
609 (define_expand "mul<mode>3"
610 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
612 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
613 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
614 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
615 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
617 (define_insn "*mul<mode>3"
618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
620 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
621 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
622 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
623 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
624 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
625 [(set_attr "type" "ssemul")
626 (set_attr "mode" "<MODE>")])
628 (define_insn "*avx_vmmul<mode>3"
629 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
630 (vec_merge:SSEMODEF2P
632 (match_operand:SSEMODEF2P 1 "register_operand" "x")
633 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
636 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
638 [(set_attr "type" "ssemul")
639 (set_attr "prefix" "vex")
640 (set_attr "mode" "<ssescalarmode>")])
642 (define_insn "<sse>_vmmul<mode>3"
643 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
644 (vec_merge:SSEMODEF2P
646 (match_operand:SSEMODEF2P 1 "register_operand" "0")
647 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
650 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
651 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
652 [(set_attr "type" "ssemul")
653 (set_attr "mode" "<ssescalarmode>")])
655 (define_expand "divv8sf3"
656 [(set (match_operand:V8SF 0 "register_operand" "")
657 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
658 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
661 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
663 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
664 && flag_finite_math_only && !flag_trapping_math
665 && flag_unsafe_math_optimizations)
667 ix86_emit_swdivsf (operands[0], operands[1],
668 operands[2], V8SFmode);
673 (define_expand "divv4df3"
674 [(set (match_operand:V4DF 0 "register_operand" "")
675 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
676 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
678 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
680 (define_insn "avx_div<mode>3"
681 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
683 (match_operand:AVXMODEF2P 1 "register_operand" "x")
684 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
685 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
686 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
687 [(set_attr "type" "ssediv")
688 (set_attr "prefix" "vex")
689 (set_attr "mode" "<MODE>")])
691 (define_expand "divv4sf3"
692 [(set (match_operand:V4SF 0 "register_operand" "")
693 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
694 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
697 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
698 && flag_finite_math_only && !flag_trapping_math
699 && flag_unsafe_math_optimizations)
701 ix86_emit_swdivsf (operands[0], operands[1],
702 operands[2], V4SFmode);
707 (define_expand "divv2df3"
708 [(set (match_operand:V2DF 0 "register_operand" "")
709 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
710 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
714 (define_insn "*avx_div<mode>3"
715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (match_operand:SSEMODEF2P 1 "register_operand" "x")
718 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
719 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "prefix" "vex")
723 (set_attr "mode" "<MODE>")])
725 (define_insn "<sse>_div<mode>3"
726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (match_operand:SSEMODEF2P 1 "register_operand" "0")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
730 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
731 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
732 [(set_attr "type" "ssediv")
733 (set_attr "mode" "<MODE>")])
735 (define_insn "*avx_vmdiv<mode>3"
736 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
737 (vec_merge:SSEMODEF2P
739 (match_operand:SSEMODEF2P 1 "register_operand" "x")
740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
743 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
744 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
745 [(set_attr "type" "ssediv")
746 (set_attr "prefix" "vex")
747 (set_attr "mode" "<ssescalarmode>")])
749 (define_insn "<sse>_vmdiv<mode>3"
750 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
751 (vec_merge:SSEMODEF2P
753 (match_operand:SSEMODEF2P 1 "register_operand" "0")
754 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
757 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
758 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
759 [(set_attr "type" "ssediv")
760 (set_attr "mode" "<ssescalarmode>")])
762 (define_insn "avx_rcpv8sf2"
763 [(set (match_operand:V8SF 0 "register_operand" "=x")
765 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
767 "vrcpps\t{%1, %0|%0, %1}"
768 [(set_attr "type" "sse")
769 (set_attr "prefix" "vex")
770 (set_attr "mode" "V8SF")])
772 (define_insn "sse_rcpv4sf2"
773 [(set (match_operand:V4SF 0 "register_operand" "=x")
775 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
777 "%vrcpps\t{%1, %0|%0, %1}"
778 [(set_attr "type" "sse")
779 (set_attr "atom_sse_attr" "rcp")
780 (set_attr "prefix" "maybe_vex")
781 (set_attr "mode" "V4SF")])
783 (define_insn "*avx_vmrcpv4sf2"
784 [(set (match_operand:V4SF 0 "register_operand" "=x")
786 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
788 (match_operand:V4SF 2 "register_operand" "x")
791 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
792 [(set_attr "type" "sse")
793 (set_attr "prefix" "vex")
794 (set_attr "mode" "SF")])
796 (define_insn "sse_vmrcpv4sf2"
797 [(set (match_operand:V4SF 0 "register_operand" "=x")
799 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
801 (match_operand:V4SF 2 "register_operand" "0")
804 "rcpss\t{%1, %0|%0, %1}"
805 [(set_attr "type" "sse")
806 (set_attr "atom_sse_attr" "rcp")
807 (set_attr "mode" "SF")])
809 (define_expand "sqrtv8sf2"
810 [(set (match_operand:V8SF 0 "register_operand" "")
811 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
814 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
815 && flag_finite_math_only && !flag_trapping_math
816 && flag_unsafe_math_optimizations)
818 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
823 (define_insn "avx_sqrtv8sf2"
824 [(set (match_operand:V8SF 0 "register_operand" "=x")
825 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
827 "vsqrtps\t{%1, %0|%0, %1}"
828 [(set_attr "type" "sse")
829 (set_attr "prefix" "vex")
830 (set_attr "mode" "V8SF")])
832 (define_expand "sqrtv4sf2"
833 [(set (match_operand:V4SF 0 "register_operand" "")
834 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
837 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
838 && flag_finite_math_only && !flag_trapping_math
839 && flag_unsafe_math_optimizations)
841 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
846 (define_insn "sse_sqrtv4sf2"
847 [(set (match_operand:V4SF 0 "register_operand" "=x")
848 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
850 "%vsqrtps\t{%1, %0|%0, %1}"
851 [(set_attr "type" "sse")
852 (set_attr "atom_sse_attr" "sqrt")
853 (set_attr "prefix" "maybe_vex")
854 (set_attr "mode" "V4SF")])
856 (define_insn "sqrtv4df2"
857 [(set (match_operand:V4DF 0 "register_operand" "=x")
858 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
860 "vsqrtpd\t{%1, %0|%0, %1}"
861 [(set_attr "type" "sse")
862 (set_attr "prefix" "vex")
863 (set_attr "mode" "V4DF")])
865 (define_insn "sqrtv2df2"
866 [(set (match_operand:V2DF 0 "register_operand" "=x")
867 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
869 "%vsqrtpd\t{%1, %0|%0, %1}"
870 [(set_attr "type" "sse")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V2DF")])
874 (define_insn "*avx_vmsqrt<mode>2"
875 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
876 (vec_merge:SSEMODEF2P
878 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
879 (match_operand:SSEMODEF2P 2 "register_operand" "x")
881 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
882 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
883 [(set_attr "type" "sse")
884 (set_attr "prefix" "vex")
885 (set_attr "mode" "<ssescalarmode>")])
887 (define_insn "<sse>_vmsqrt<mode>2"
888 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
889 (vec_merge:SSEMODEF2P
891 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
892 (match_operand:SSEMODEF2P 2 "register_operand" "0")
894 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
895 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
896 [(set_attr "type" "sse")
897 (set_attr "atom_sse_attr" "sqrt")
898 (set_attr "mode" "<ssescalarmode>")])
900 (define_expand "rsqrtv8sf2"
901 [(set (match_operand:V8SF 0 "register_operand" "")
903 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
904 "TARGET_AVX && TARGET_SSE_MATH"
906 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
910 (define_insn "avx_rsqrtv8sf2"
911 [(set (match_operand:V8SF 0 "register_operand" "=x")
913 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
915 "vrsqrtps\t{%1, %0|%0, %1}"
916 [(set_attr "type" "sse")
917 (set_attr "prefix" "vex")
918 (set_attr "mode" "V8SF")])
920 (define_expand "rsqrtv4sf2"
921 [(set (match_operand:V4SF 0 "register_operand" "")
923 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
926 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
930 (define_insn "sse_rsqrtv4sf2"
931 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
935 "%vrsqrtps\t{%1, %0|%0, %1}"
936 [(set_attr "type" "sse")
937 (set_attr "prefix" "maybe_vex")
938 (set_attr "mode" "V4SF")])
940 (define_insn "*avx_vmrsqrtv4sf2"
941 [(set (match_operand:V4SF 0 "register_operand" "=x")
943 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
945 (match_operand:V4SF 2 "register_operand" "x")
948 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
949 [(set_attr "type" "sse")
950 (set_attr "prefix" "vex")
951 (set_attr "mode" "SF")])
953 (define_insn "sse_vmrsqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
956 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
958 (match_operand:V4SF 2 "register_operand" "0")
961 "rsqrtss\t{%1, %0|%0, %1}"
962 [(set_attr "type" "sse")
963 (set_attr "mode" "SF")])
965 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
966 ;; isn't really correct, as those rtl operators aren't defined when
967 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
969 (define_expand "<code><mode>3"
970 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
971 (smaxmin:AVX256MODEF2P
972 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
973 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
974 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
976 if (!flag_finite_math_only)
977 operands[1] = force_reg (<MODE>mode, operands[1]);
978 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
981 (define_expand "<code><mode>3"
982 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
984 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
985 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
986 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
988 if (!flag_finite_math_only)
989 operands[1] = force_reg (<MODE>mode, operands[1]);
990 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
993 (define_insn "*avx_<code><mode>3_finite"
994 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
996 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
997 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
998 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
999 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1000 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1001 [(set_attr "type" "sseadd")
1002 (set_attr "prefix" "vex")
1003 (set_attr "mode" "<MODE>")])
1005 (define_insn "*<code><mode>3_finite"
1006 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1008 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1009 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1010 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1011 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1012 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1013 [(set_attr "type" "sseadd")
1014 (set_attr "mode" "<MODE>")])
1016 (define_insn "*avx_<code><mode>3"
1017 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1019 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1020 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1021 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1022 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1023 [(set_attr "type" "sseadd")
1024 (set_attr "prefix" "vex")
1025 (set_attr "mode" "<avxvecmode>")])
1027 (define_insn "*<code><mode>3"
1028 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1030 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1031 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1032 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1033 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "sseadd")
1035 (set_attr "mode" "<MODE>")])
1037 (define_insn "*avx_vm<code><mode>3"
1038 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1039 (vec_merge:SSEMODEF2P
1041 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1042 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1045 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1046 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "type" "sse")
1048 (set_attr "prefix" "vex")
1049 (set_attr "mode" "<ssescalarmode>")])
1051 (define_insn "<sse>_vm<code><mode>3"
1052 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1053 (vec_merge:SSEMODEF2P
1055 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1056 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1059 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1060 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1061 [(set_attr "type" "sseadd")
1062 (set_attr "mode" "<ssescalarmode>")])
1064 ;; These versions of the min/max patterns implement exactly the operations
1065 ;; min = (op1 < op2 ? op1 : op2)
1066 ;; max = (!(op1 < op2) ? op1 : op2)
1067 ;; Their operands are not commutative, and thus they may be used in the
1068 ;; presence of -0.0 and NaN.
1070 (define_insn "*avx_ieee_smin<mode>3"
1071 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1073 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1074 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1076 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1077 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1078 [(set_attr "type" "sseadd")
1079 (set_attr "prefix" "vex")
1080 (set_attr "mode" "<avxvecmode>")])
1082 (define_insn "*avx_ieee_smax<mode>3"
1083 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1085 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1086 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1088 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1089 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1090 [(set_attr "type" "sseadd")
1091 (set_attr "prefix" "vex")
1092 (set_attr "mode" "<avxvecmode>")])
1094 (define_insn "*ieee_smin<mode>3"
1095 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1097 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1098 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1100 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1101 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sseadd")
1103 (set_attr "mode" "<MODE>")])
1105 (define_insn "*ieee_smax<mode>3"
1106 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1108 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1109 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1111 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1112 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "mode" "<MODE>")])
1116 (define_insn "avx_addsubv8sf3"
1117 [(set (match_operand:V8SF 0 "register_operand" "=x")
1120 (match_operand:V8SF 1 "register_operand" "x")
1121 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1122 (minus:V8SF (match_dup 1) (match_dup 2))
1125 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1126 [(set_attr "type" "sseadd")
1127 (set_attr "prefix" "vex")
1128 (set_attr "mode" "V8SF")])
1130 (define_insn "avx_addsubv4df3"
1131 [(set (match_operand:V4DF 0 "register_operand" "=x")
1134 (match_operand:V4DF 1 "register_operand" "x")
1135 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1136 (minus:V4DF (match_dup 1) (match_dup 2))
1139 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1140 [(set_attr "type" "sseadd")
1141 (set_attr "prefix" "vex")
1142 (set_attr "mode" "V4DF")])
1144 (define_insn "*avx_addsubv4sf3"
1145 [(set (match_operand:V4SF 0 "register_operand" "=x")
1148 (match_operand:V4SF 1 "register_operand" "x")
1149 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1150 (minus:V4SF (match_dup 1) (match_dup 2))
1153 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "type" "sseadd")
1155 (set_attr "prefix" "vex")
1156 (set_attr "mode" "V4SF")])
1158 (define_insn "sse3_addsubv4sf3"
1159 [(set (match_operand:V4SF 0 "register_operand" "=x")
1162 (match_operand:V4SF 1 "register_operand" "0")
1163 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1164 (minus:V4SF (match_dup 1) (match_dup 2))
1167 "addsubps\t{%2, %0|%0, %2}"
1168 [(set_attr "type" "sseadd")
1169 (set_attr "prefix_rep" "1")
1170 (set_attr "mode" "V4SF")])
1172 (define_insn "*avx_addsubv2df3"
1173 [(set (match_operand:V2DF 0 "register_operand" "=x")
1176 (match_operand:V2DF 1 "register_operand" "x")
1177 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1178 (minus:V2DF (match_dup 1) (match_dup 2))
1181 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1182 [(set_attr "type" "sseadd")
1183 (set_attr "prefix" "vex")
1184 (set_attr "mode" "V2DF")])
1186 (define_insn "sse3_addsubv2df3"
1187 [(set (match_operand:V2DF 0 "register_operand" "=x")
1190 (match_operand:V2DF 1 "register_operand" "0")
1191 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1192 (minus:V2DF (match_dup 1) (match_dup 2))
1195 "addsubpd\t{%2, %0|%0, %2}"
1196 [(set_attr "type" "sseadd")
1197 (set_attr "atom_unit" "complex")
1198 (set_attr "mode" "V2DF")])
1200 (define_insn "avx_h<plusminus_insn>v4df3"
1201 [(set (match_operand:V4DF 0 "register_operand" "=x")
1206 (match_operand:V4DF 1 "register_operand" "x")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1210 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1211 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1215 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1216 (parallel [(const_int 0)]))
1217 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1219 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1220 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1222 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1223 [(set_attr "type" "sseadd")
1224 (set_attr "prefix" "vex")
1225 (set_attr "mode" "V4DF")])
1227 (define_insn "avx_h<plusminus_insn>v8sf3"
1228 [(set (match_operand:V8SF 0 "register_operand" "=x")
1234 (match_operand:V8SF 1 "register_operand" "x")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1238 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1243 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1244 (parallel [(const_int 0)]))
1245 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1247 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1248 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1252 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1255 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1259 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1262 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1265 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1266 [(set_attr "type" "sseadd")
1267 (set_attr "prefix" "vex")
1268 (set_attr "mode" "V8SF")])
1270 (define_insn "*avx_h<plusminus_insn>v4sf3"
1271 [(set (match_operand:V4SF 0 "register_operand" "=x")
1276 (match_operand:V4SF 1 "register_operand" "x")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1280 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1285 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1286 (parallel [(const_int 0)]))
1287 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1289 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1290 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1292 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1293 [(set_attr "type" "sseadd")
1294 (set_attr "prefix" "vex")
1295 (set_attr "mode" "V4SF")])
1297 (define_insn "sse3_h<plusminus_insn>v4sf3"
1298 [(set (match_operand:V4SF 0 "register_operand" "=x")
1303 (match_operand:V4SF 1 "register_operand" "0")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1313 (parallel [(const_int 0)]))
1314 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1316 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1317 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1319 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1320 [(set_attr "type" "sseadd")
1321 (set_attr "atom_unit" "complex")
1322 (set_attr "prefix_rep" "1")
1323 (set_attr "mode" "V4SF")])
1325 (define_insn "*avx_h<plusminus_insn>v2df3"
1326 [(set (match_operand:V2DF 0 "register_operand" "=x")
1330 (match_operand:V2DF 1 "register_operand" "x")
1331 (parallel [(const_int 0)]))
1332 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1335 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1336 (parallel [(const_int 0)]))
1337 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1339 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1340 [(set_attr "type" "sseadd")
1341 (set_attr "prefix" "vex")
1342 (set_attr "mode" "V2DF")])
1344 (define_insn "sse3_h<plusminus_insn>v2df3"
1345 [(set (match_operand:V2DF 0 "register_operand" "=x")
1349 (match_operand:V2DF 1 "register_operand" "0")
1350 (parallel [(const_int 0)]))
1351 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1354 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1355 (parallel [(const_int 0)]))
1356 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1358 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1359 [(set_attr "type" "sseadd")
1360 (set_attr "mode" "V2DF")])
1362 (define_expand "reduc_splus_v4sf"
1363 [(match_operand:V4SF 0 "register_operand" "")
1364 (match_operand:V4SF 1 "register_operand" "")]
1369 rtx tmp = gen_reg_rtx (V4SFmode);
1370 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1371 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1374 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1378 (define_expand "reduc_splus_v2df"
1379 [(match_operand:V2DF 0 "register_operand" "")
1380 (match_operand:V2DF 1 "register_operand" "")]
1383 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1387 (define_expand "reduc_smax_v4sf"
1388 [(match_operand:V4SF 0 "register_operand" "")
1389 (match_operand:V4SF 1 "register_operand" "")]
1392 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_smin_v4sf"
1397 [(match_operand:V4SF 0 "register_operand" "")
1398 (match_operand:V4SF 1 "register_operand" "")]
1401 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1407 ;; Parallel floating point comparisons
1409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1411 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1412 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1414 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1415 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1416 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1419 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1420 [(set_attr "type" "ssecmp")
1421 (set_attr "length_immediate" "1")
1422 (set_attr "prefix" "vex")
1423 (set_attr "mode" "<MODE>")])
1425 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1426 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1427 (vec_merge:SSEMODEF2P
1429 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1430 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1431 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1436 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1437 [(set_attr "type" "ssecmp")
1438 (set_attr "length_immediate" "1")
1439 (set_attr "prefix" "vex")
1440 (set_attr "mode" "<ssescalarmode>")])
1442 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1443 ;; may generate 256bit vector compare instructions.
1444 (define_insn "*avx_maskcmp<mode>3"
1445 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1446 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1447 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1448 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1449 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1450 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1451 [(set_attr "type" "ssecmp")
1452 (set_attr "prefix" "vex")
1453 (set_attr "length_immediate" "1")
1454 (set_attr "mode" "<avxvecmode>")])
1456 (define_insn "<sse>_maskcmp<mode>3"
1457 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1458 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1459 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1460 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1462 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1463 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1464 [(set_attr "type" "ssecmp")
1465 (set_attr "length_immediate" "1")
1466 (set_attr "mode" "<MODE>")])
1468 (define_insn "<sse>_vmmaskcmp<mode>3"
1469 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1470 (vec_merge:SSEMODEF2P
1471 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1472 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1473 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1476 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1477 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1478 [(set_attr "type" "ssecmp")
1479 (set_attr "length_immediate" "1")
1480 (set_attr "mode" "<ssescalarmode>")])
1482 (define_insn "<sse>_comi"
1483 [(set (reg:CCFP FLAGS_REG)
1486 (match_operand:<ssevecmode> 0 "register_operand" "x")
1487 (parallel [(const_int 0)]))
1489 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1490 (parallel [(const_int 0)]))))]
1491 "SSE_FLOAT_MODE_P (<MODE>mode)"
1492 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1493 [(set_attr "type" "ssecomi")
1494 (set_attr "prefix" "maybe_vex")
1495 (set_attr "prefix_rep" "0")
1496 (set (attr "prefix_data16")
1497 (if_then_else (eq_attr "mode" "DF")
1499 (const_string "0")))
1500 (set_attr "mode" "<MODE>")])
1502 (define_insn "<sse>_ucomi"
1503 [(set (reg:CCFPU FLAGS_REG)
1506 (match_operand:<ssevecmode> 0 "register_operand" "x")
1507 (parallel [(const_int 0)]))
1509 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1510 (parallel [(const_int 0)]))))]
1511 "SSE_FLOAT_MODE_P (<MODE>mode)"
1512 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1513 [(set_attr "type" "ssecomi")
1514 (set_attr "prefix" "maybe_vex")
1515 (set_attr "prefix_rep" "0")
1516 (set (attr "prefix_data16")
1517 (if_then_else (eq_attr "mode" "DF")
1519 (const_string "0")))
1520 (set_attr "mode" "<MODE>")])
1522 (define_expand "vcond<mode>"
1523 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1524 (if_then_else:SSEMODEF2P
1525 (match_operator 3 ""
1526 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1527 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1528 (match_operand:SSEMODEF2P 1 "general_operand" "")
1529 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1532 bool ok = ix86_expand_fp_vcond (operands);
1537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1539 ;; Parallel floating point logical operations
1541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543 (define_insn "avx_andnot<mode>3"
1544 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1547 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1548 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1549 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1550 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1551 [(set_attr "type" "sselog")
1552 (set_attr "prefix" "vex")
1553 (set_attr "mode" "<avxvecmode>")])
1555 (define_insn "<sse>_andnot<mode>3"
1556 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1559 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1560 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1561 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1562 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1563 [(set_attr "type" "sselog")
1564 (set_attr "mode" "<MODE>")])
1566 (define_expand "<code><mode>3"
1567 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1568 (plogic:AVX256MODEF2P
1569 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1570 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1571 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1572 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1574 (define_insn "*avx_<code><mode>3"
1575 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1577 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1578 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1579 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1580 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1581 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1582 [(set_attr "type" "sselog")
1583 (set_attr "prefix" "vex")
1584 (set_attr "mode" "<avxvecmode>")])
1586 (define_expand "<code><mode>3"
1587 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1589 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1590 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1591 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1592 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1594 (define_insn "*<code><mode>3"
1595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1597 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1598 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1599 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1600 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1601 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1602 [(set_attr "type" "sselog")
1603 (set_attr "mode" "<MODE>")])
1605 (define_expand "copysign<mode>3"
1608 (not:SSEMODEF2P (match_dup 3))
1609 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1611 (and:SSEMODEF2P (match_dup 3)
1612 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1613 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1614 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1615 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1617 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1619 operands[4] = gen_reg_rtx (<MODE>mode);
1620 operands[5] = gen_reg_rtx (<MODE>mode);
1623 ;; Also define scalar versions. These are used for abs, neg, and
1624 ;; conditional move. Using subregs into vector modes causes register
1625 ;; allocation lossage. These patterns do not allow memory operands
1626 ;; because the native instructions read the full 128-bits.
1628 (define_insn "*avx_andnot<mode>3"
1629 [(set (match_operand:MODEF 0 "register_operand" "=x")
1632 (match_operand:MODEF 1 "register_operand" "x"))
1633 (match_operand:MODEF 2 "register_operand" "x")))]
1634 "AVX_FLOAT_MODE_P (<MODE>mode)"
1635 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1636 [(set_attr "type" "sselog")
1637 (set_attr "prefix" "vex")
1638 (set_attr "mode" "<ssevecmode>")])
1640 (define_insn "*andnot<mode>3"
1641 [(set (match_operand:MODEF 0 "register_operand" "=x")
1644 (match_operand:MODEF 1 "register_operand" "0"))
1645 (match_operand:MODEF 2 "register_operand" "x")))]
1646 "SSE_FLOAT_MODE_P (<MODE>mode)"
1647 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1648 [(set_attr "type" "sselog")
1649 (set_attr "mode" "<ssevecmode>")])
1651 (define_insn "*avx_<code><mode>3"
1652 [(set (match_operand:MODEF 0 "register_operand" "=x")
1654 (match_operand:MODEF 1 "register_operand" "x")
1655 (match_operand:MODEF 2 "register_operand" "x")))]
1656 "AVX_FLOAT_MODE_P (<MODE>mode)"
1657 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1658 [(set_attr "type" "sselog")
1659 (set_attr "prefix" "vex")
1660 (set_attr "mode" "<ssevecmode>")])
1662 (define_insn "*<code><mode>3"
1663 [(set (match_operand:MODEF 0 "register_operand" "=x")
1665 (match_operand:MODEF 1 "register_operand" "0")
1666 (match_operand:MODEF 2 "register_operand" "x")))]
1667 "SSE_FLOAT_MODE_P (<MODE>mode)"
1668 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1669 [(set_attr "type" "sselog")
1670 (set_attr "mode" "<ssevecmode>")])
1672 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1674 ;; FMA4 floating point multiply/accumulate instructions This includes the
1675 ;; scalar version of the instructions as well as the vector
1677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1679 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1680 ;; combine to generate a multiply/add with two memory references. We then
1681 ;; split this insn, into loading up the destination register with one of the
1682 ;; memory operations. If we don't manage to split the insn, reload will
1683 ;; generate the appropriate moves. The reason this is needed, is that combine
1684 ;; has already folded one of the memory references into both the multiply and
1685 ;; add insns, and it can't generate a new pseudo. I.e.:
1686 ;; (set (reg1) (mem (addr1)))
1687 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1688 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1690 (define_insn "fma4_fmadd<mode>4256"
1691 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1694 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1695 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1696 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1698 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1699 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1700 [(set_attr "type" "ssemuladd")
1701 (set_attr "mode" "<MODE>")])
1703 ;; Split fmadd with two memory operands into a load and the fmadd.
1705 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1708 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1709 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1710 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1712 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1713 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1714 && !reg_mentioned_p (operands[0], operands[1])
1715 && !reg_mentioned_p (operands[0], operands[2])
1716 && !reg_mentioned_p (operands[0], operands[3])"
1719 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1720 emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
1721 operands[2], operands[3]));
1725 ;; Floating multiply and subtract
1726 ;; Allow two memory operands the same as fmadd
1727 (define_insn "fma4_fmsub<mode>4256"
1728 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1731 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1732 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1733 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1735 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1736 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1737 [(set_attr "type" "ssemuladd")
1738 (set_attr "mode" "<MODE>")])
1740 ;; Split fmsub with two memory operands into a load and the fmsub.
1742 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1745 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1746 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1747 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1749 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1750 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1751 && !reg_mentioned_p (operands[0], operands[1])
1752 && !reg_mentioned_p (operands[0], operands[2])
1753 && !reg_mentioned_p (operands[0], operands[3])"
1756 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1757 emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
1758 operands[2], operands[3]));
1762 ;; Floating point negative multiply and add
1763 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1764 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1765 ;; Allow two memory operands to help in optimizing.
1766 (define_insn "fma4_fnmadd<mode>4256"
1767 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1769 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")
1771 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1772 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1774 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1775 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1776 [(set_attr "type" "ssemuladd")
1777 (set_attr "mode" "<MODE>")])
1779 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1781 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1783 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
1785 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1786 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
1788 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1789 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1790 && !reg_mentioned_p (operands[0], operands[1])
1791 && !reg_mentioned_p (operands[0], operands[2])
1792 && !reg_mentioned_p (operands[0], operands[3])"
1795 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1796 emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
1797 operands[2], operands[3]));
1801 ;; Floating point negative multiply and subtract
1802 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1803 ;; Allow 2 memory operands to help with optimization
1804 (define_insn "fma4_fnmsub<mode>4256"
1805 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1809 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
1810 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
1811 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1813 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
1814 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1815 [(set_attr "type" "ssemuladd")
1816 (set_attr "mode" "<MODE>")])
1818 ;; Split fnmsub with two memory operands into a load and the fmsub.
1820 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1824 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
1825 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1826 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1828 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
1829 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
1830 && !reg_mentioned_p (operands[0], operands[1])
1831 && !reg_mentioned_p (operands[0], operands[2])
1832 && !reg_mentioned_p (operands[0], operands[3])"
1835 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1836 emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
1837 operands[2], operands[3]));
1841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1842 (define_insn "fma4_fmadd<mode>4"
1843 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1846 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1847 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1848 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1850 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1851 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1852 [(set_attr "type" "ssemuladd")
1853 (set_attr "mode" "<MODE>")])
1855 ;; Split fmadd with two memory operands into a load and the fmadd.
1857 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1860 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1861 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1862 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1864 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1865 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1866 && !reg_mentioned_p (operands[0], operands[1])
1867 && !reg_mentioned_p (operands[0], operands[2])
1868 && !reg_mentioned_p (operands[0], operands[3])"
1871 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1872 emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
1873 operands[2], operands[3]));
1877 ;; For the scalar operations, use operand1 for the upper words that aren't
1878 ;; modified, so restrict the forms that are generated.
1879 ;; Scalar version of fmadd
1880 (define_insn "fma4_vmfmadd<mode>4"
1881 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1882 (vec_merge:SSEMODEF2P
1885 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1886 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1887 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1891 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
1892 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1893 [(set_attr "type" "ssemuladd")
1894 (set_attr "mode" "<MODE>")])
1896 ;; Floating multiply and subtract
1897 ;; Allow two memory operands the same as fmadd
1898 (define_insn "fma4_fmsub<mode>4"
1899 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1902 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1903 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1904 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1906 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1907 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1908 [(set_attr "type" "ssemuladd")
1909 (set_attr "mode" "<MODE>")])
1911 ;; Split fmsub with two memory operands into a load and the fmsub.
1913 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1916 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1917 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1918 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1920 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1921 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1922 && !reg_mentioned_p (operands[0], operands[1])
1923 && !reg_mentioned_p (operands[0], operands[2])
1924 && !reg_mentioned_p (operands[0], operands[3])"
1927 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1928 emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
1929 operands[2], operands[3]));
1933 ;; For the scalar operations, use operand1 for the upper words that aren't
1934 ;; modified, so restrict the forms that are generated.
1935 ;; Scalar version of fmsub
1936 (define_insn "fma4_vmfmsub<mode>4"
1937 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1938 (vec_merge:SSEMODEF2P
1941 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1942 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1943 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1947 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
1948 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1949 [(set_attr "type" "ssemuladd")
1950 (set_attr "mode" "<MODE>")])
1952 ;; Floating point negative multiply and add
1953 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1954 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1955 ;; Allow two memory operands to help in optimizing.
1956 (define_insn "fma4_fnmadd<mode>4"
1957 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1959 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")
1961 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1962 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1964 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1965 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1966 [(set_attr "type" "ssemuladd")
1967 (set_attr "mode" "<MODE>")])
1969 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1971 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1973 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1975 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1976 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1978 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1979 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1980 && !reg_mentioned_p (operands[0], operands[1])
1981 && !reg_mentioned_p (operands[0], operands[2])
1982 && !reg_mentioned_p (operands[0], operands[3])"
1985 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1986 emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
1987 operands[2], operands[3]));
1991 ;; For the scalar operations, use operand1 for the upper words that aren't
1992 ;; modified, so restrict the forms that are generated.
1993 ;; Scalar version of fnmadd
1994 (define_insn "fma4_vmfnmadd<mode>4"
1995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1996 (vec_merge:SSEMODEF2P
1998 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2000 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2001 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2005 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2006 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2007 [(set_attr "type" "ssemuladd")
2008 (set_attr "mode" "<MODE>")])
2010 ;; Floating point negative multiply and subtract
2011 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2012 ;; Allow 2 memory operands to help with optimization
2013 (define_insn "fma4_fnmsub<mode>4"
2014 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
2018 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x"))
2019 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
2020 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
2022 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2023 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2027 ;; Split fnmsub with two memory operands into a load and the fmsub.
2029 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
2033 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
2034 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
2035 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
2037 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
2038 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
2039 && !reg_mentioned_p (operands[0], operands[1])
2040 && !reg_mentioned_p (operands[0], operands[2])
2041 && !reg_mentioned_p (operands[0], operands[3])"
2044 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
2045 emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
2046 operands[2], operands[3]));
2050 ;; For the scalar operations, use operand1 for the upper words that aren't
2051 ;; modified, so restrict the forms that are generated.
2052 ;; Scalar version of fnmsub
2053 (define_insn "fma4_vmfnmsub<mode>4"
2054 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2055 (vec_merge:SSEMODEF2P
2059 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2061 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2065 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2066 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2067 [(set_attr "type" "ssemuladd")
2068 (set_attr "mode" "<MODE>")])
2070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2072 (define_insn "fma4i_fmadd<mode>4256"
2073 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2077 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2078 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2079 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2080 UNSPEC_FMA4_INTRINSIC))]
2081 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2082 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2083 [(set_attr "type" "ssemuladd")
2084 (set_attr "mode" "<MODE>")])
2086 (define_insn "fma4i_fmsub<mode>4256"
2087 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2091 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2092 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2093 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2094 UNSPEC_FMA4_INTRINSIC))]
2095 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2096 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2097 [(set_attr "type" "ssemuladd")
2098 (set_attr "mode" "<MODE>")])
2100 (define_insn "fma4i_fnmadd<mode>4256"
2101 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2104 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
2106 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2107 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))]
2108 UNSPEC_FMA4_INTRINSIC))]
2109 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2110 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2111 [(set_attr "type" "ssemuladd")
2112 (set_attr "mode" "<MODE>")])
2114 (define_insn "fma4i_fnmsub<mode>4256"
2115 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2120 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
2121 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2122 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2123 UNSPEC_FMA4_INTRINSIC))]
2124 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2125 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2126 [(set_attr "type" "ssemuladd")
2127 (set_attr "mode" "<MODE>")])
2128 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2130 (define_insn "fma4i_fmadd<mode>4"
2131 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2135 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2136 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2137 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2138 UNSPEC_FMA4_INTRINSIC))]
2139 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2140 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2141 [(set_attr "type" "ssemuladd")
2142 (set_attr "mode" "<MODE>")])
2144 (define_insn "fma4i_fmsub<mode>4"
2145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2149 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2150 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2151 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2152 UNSPEC_FMA4_INTRINSIC))]
2153 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2154 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2155 [(set_attr "type" "ssemuladd")
2156 (set_attr "mode" "<MODE>")])
2158 (define_insn "fma4i_fnmadd<mode>4"
2159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2162 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2164 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2165 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))]
2166 UNSPEC_FMA4_INTRINSIC))]
2167 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2168 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2169 [(set_attr "type" "ssemuladd")
2170 (set_attr "mode" "<MODE>")])
2172 (define_insn "fma4i_fnmsub<mode>4"
2173 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2178 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2179 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2180 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2181 UNSPEC_FMA4_INTRINSIC))]
2182 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2183 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2184 [(set_attr "type" "ssemuladd")
2185 (set_attr "mode" "<MODE>")])
2187 ;; For the scalar operations, use operand1 for the upper words that aren't
2188 ;; modified, so restrict the forms that are accepted.
2189 (define_insn "fma4i_vmfmadd<mode>4"
2190 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2192 [(vec_merge:SSEMODEF2P
2195 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2196 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2197 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2200 UNSPEC_FMA4_INTRINSIC))]
2201 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2202 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2203 [(set_attr "type" "ssemuladd")
2204 (set_attr "mode" "<ssescalarmode>")])
2206 (define_insn "fma4i_vmfmsub<mode>4"
2207 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2209 [(vec_merge:SSEMODEF2P
2212 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2213 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2214 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2217 UNSPEC_FMA4_INTRINSIC))]
2218 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2219 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2220 [(set_attr "type" "ssemuladd")
2221 (set_attr "mode" "<ssescalarmode>")])
2223 (define_insn "fma4i_vmfnmadd<mode>4"
2224 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2226 [(vec_merge:SSEMODEF2P
2228 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2230 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2231 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2234 UNSPEC_FMA4_INTRINSIC))]
2235 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2236 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2237 [(set_attr "type" "ssemuladd")
2238 (set_attr "mode" "<ssescalarmode>")])
2240 (define_insn "fma4i_vmfnmsub<mode>4"
2241 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2243 [(vec_merge:SSEMODEF2P
2247 (match_operand:SSEMODEF2P 1 "register_operand" "x,x"))
2248 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2249 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2252 UNSPEC_FMA4_INTRINSIC))]
2253 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2254 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2255 [(set_attr "type" "ssemuladd")
2256 (set_attr "mode" "<ssescalarmode>")])
2258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2260 ;; FMA4 Parallel floating point multiply addsub and subadd operations
2262 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2264 (define_insn "fma4_fmaddsubv8sf4"
2265 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2269 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2270 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2271 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2279 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2280 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2281 [(set_attr "type" "ssemuladd")
2282 (set_attr "mode" "V8SF")])
2284 (define_insn "fma4_fmaddsubv4df4"
2285 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2289 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2290 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2291 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2299 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2300 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2301 [(set_attr "type" "ssemuladd")
2302 (set_attr "mode" "V4DF")])
2304 (define_insn "fma4_fmaddsubv4sf4"
2305 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2309 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2310 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2311 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2319 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2320 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2321 [(set_attr "type" "ssemuladd")
2322 (set_attr "mode" "V4SF")])
2324 (define_insn "fma4_fmaddsubv2df4"
2325 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2329 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2330 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2331 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2339 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2340 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2341 [(set_attr "type" "ssemuladd")
2342 (set_attr "mode" "V2DF")])
2344 (define_insn "fma4_fmsubaddv8sf4"
2345 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2349 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2350 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2351 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2359 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2360 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2361 [(set_attr "type" "ssemuladd")
2362 (set_attr "mode" "V8SF")])
2364 (define_insn "fma4_fmsubaddv4df4"
2365 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2369 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2370 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2371 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2379 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2380 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2381 [(set_attr "type" "ssemuladd")
2382 (set_attr "mode" "V4DF")])
2384 (define_insn "fma4_fmsubaddv4sf4"
2385 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2389 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2390 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2391 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2399 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2400 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2401 [(set_attr "type" "ssemuladd")
2402 (set_attr "mode" "V4SF")])
2404 (define_insn "fma4_fmsubaddv2df4"
2405 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2409 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2410 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2411 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2419 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2420 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2421 [(set_attr "type" "ssemuladd")
2422 (set_attr "mode" "V2DF")])
2424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2426 (define_insn "fma4i_fmaddsubv8sf4"
2427 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2432 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2433 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2434 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2441 UNSPEC_FMA4_INTRINSIC))]
2443 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2444 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2445 [(set_attr "type" "ssemuladd")
2446 (set_attr "mode" "V8SF")])
2448 (define_insn "fma4i_fmaddsubv4df4"
2449 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2454 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2455 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2456 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2463 UNSPEC_FMA4_INTRINSIC))]
2465 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2466 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2467 [(set_attr "type" "ssemuladd")
2468 (set_attr "mode" "V4DF")])
2470 (define_insn "fma4i_fmaddsubv4sf4"
2471 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2476 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2477 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2478 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2485 UNSPEC_FMA4_INTRINSIC))]
2487 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2488 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2489 [(set_attr "type" "ssemuladd")
2490 (set_attr "mode" "V4SF")])
2492 (define_insn "fma4i_fmaddsubv2df4"
2493 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2498 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2499 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2500 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2507 UNSPEC_FMA4_INTRINSIC))]
2509 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2510 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2511 [(set_attr "type" "ssemuladd")
2512 (set_attr "mode" "V2DF")])
2514 (define_insn "fma4i_fmsubaddv8sf4"
2515 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2520 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2521 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2522 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2529 UNSPEC_FMA4_INTRINSIC))]
2531 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2532 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2533 [(set_attr "type" "ssemuladd")
2534 (set_attr "mode" "V8SF")])
2536 (define_insn "fma4i_fmsubaddv4df4"
2537 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2542 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2543 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2544 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2551 UNSPEC_FMA4_INTRINSIC))]
2553 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2554 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2555 [(set_attr "type" "ssemuladd")
2556 (set_attr "mode" "V4DF")])
2558 (define_insn "fma4i_fmsubaddv4sf4"
2559 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2564 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2565 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2566 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2573 UNSPEC_FMA4_INTRINSIC))]
2575 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2576 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2577 [(set_attr "type" "ssemuladd")
2578 (set_attr "mode" "V4SF")])
2580 (define_insn "fma4i_fmsubaddv2df4"
2581 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2586 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2587 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2588 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2595 UNSPEC_FMA4_INTRINSIC))]
2597 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2598 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2599 [(set_attr "type" "ssemuladd")
2600 (set_attr "mode" "V2DF")])
2602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2604 ;; Parallel single-precision floating point conversion operations
2606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2608 (define_insn "sse_cvtpi2ps"
2609 [(set (match_operand:V4SF 0 "register_operand" "=x")
2612 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2613 (match_operand:V4SF 1 "register_operand" "0")
2616 "cvtpi2ps\t{%2, %0|%0, %2}"
2617 [(set_attr "type" "ssecvt")
2618 (set_attr "mode" "V4SF")])
2620 (define_insn "sse_cvtps2pi"
2621 [(set (match_operand:V2SI 0 "register_operand" "=y")
2623 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2625 (parallel [(const_int 0) (const_int 1)])))]
2627 "cvtps2pi\t{%1, %0|%0, %1}"
2628 [(set_attr "type" "ssecvt")
2629 (set_attr "unit" "mmx")
2630 (set_attr "mode" "DI")])
2632 (define_insn "sse_cvttps2pi"
2633 [(set (match_operand:V2SI 0 "register_operand" "=y")
2635 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2636 (parallel [(const_int 0) (const_int 1)])))]
2638 "cvttps2pi\t{%1, %0|%0, %1}"
2639 [(set_attr "type" "ssecvt")
2640 (set_attr "unit" "mmx")
2641 (set_attr "prefix_rep" "0")
2642 (set_attr "mode" "SF")])
2644 (define_insn "*avx_cvtsi2ss"
2645 [(set (match_operand:V4SF 0 "register_operand" "=x")
2648 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2649 (match_operand:V4SF 1 "register_operand" "x")
2652 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2653 [(set_attr "type" "sseicvt")
2654 (set_attr "prefix" "vex")
2655 (set_attr "mode" "SF")])
2657 (define_insn "sse_cvtsi2ss"
2658 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2661 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2662 (match_operand:V4SF 1 "register_operand" "0,0")
2665 "cvtsi2ss\t{%2, %0|%0, %2}"
2666 [(set_attr "type" "sseicvt")
2667 (set_attr "athlon_decode" "vector,double")
2668 (set_attr "amdfam10_decode" "vector,double")
2669 (set_attr "mode" "SF")])
2671 (define_insn "*avx_cvtsi2ssq"
2672 [(set (match_operand:V4SF 0 "register_operand" "=x")
2675 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2676 (match_operand:V4SF 1 "register_operand" "x")
2678 "TARGET_AVX && TARGET_64BIT"
2679 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2680 [(set_attr "type" "sseicvt")
2681 (set_attr "length_vex" "4")
2682 (set_attr "prefix" "vex")
2683 (set_attr "mode" "SF")])
2685 (define_insn "sse_cvtsi2ssq"
2686 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2689 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2690 (match_operand:V4SF 1 "register_operand" "0,0")
2692 "TARGET_SSE && TARGET_64BIT"
2693 "cvtsi2ssq\t{%2, %0|%0, %2}"
2694 [(set_attr "type" "sseicvt")
2695 (set_attr "prefix_rex" "1")
2696 (set_attr "athlon_decode" "vector,double")
2697 (set_attr "amdfam10_decode" "vector,double")
2698 (set_attr "mode" "SF")])
2700 (define_insn "sse_cvtss2si"
2701 [(set (match_operand:SI 0 "register_operand" "=r,r")
2704 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2705 (parallel [(const_int 0)]))]
2706 UNSPEC_FIX_NOTRUNC))]
2708 "%vcvtss2si\t{%1, %0|%0, %1}"
2709 [(set_attr "type" "sseicvt")
2710 (set_attr "athlon_decode" "double,vector")
2711 (set_attr "prefix_rep" "1")
2712 (set_attr "prefix" "maybe_vex")
2713 (set_attr "mode" "SI")])
2715 (define_insn "sse_cvtss2si_2"
2716 [(set (match_operand:SI 0 "register_operand" "=r,r")
2717 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2718 UNSPEC_FIX_NOTRUNC))]
2720 "%vcvtss2si\t{%1, %0|%0, %1}"
2721 [(set_attr "type" "sseicvt")
2722 (set_attr "athlon_decode" "double,vector")
2723 (set_attr "amdfam10_decode" "double,double")
2724 (set_attr "prefix_rep" "1")
2725 (set_attr "prefix" "maybe_vex")
2726 (set_attr "mode" "SI")])
2728 (define_insn "sse_cvtss2siq"
2729 [(set (match_operand:DI 0 "register_operand" "=r,r")
2732 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2733 (parallel [(const_int 0)]))]
2734 UNSPEC_FIX_NOTRUNC))]
2735 "TARGET_SSE && TARGET_64BIT"
2736 "%vcvtss2siq\t{%1, %0|%0, %1}"
2737 [(set_attr "type" "sseicvt")
2738 (set_attr "athlon_decode" "double,vector")
2739 (set_attr "prefix_rep" "1")
2740 (set_attr "prefix" "maybe_vex")
2741 (set_attr "mode" "DI")])
2743 (define_insn "sse_cvtss2siq_2"
2744 [(set (match_operand:DI 0 "register_operand" "=r,r")
2745 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2746 UNSPEC_FIX_NOTRUNC))]
2747 "TARGET_SSE && TARGET_64BIT"
2748 "%vcvtss2siq\t{%1, %0|%0, %1}"
2749 [(set_attr "type" "sseicvt")
2750 (set_attr "athlon_decode" "double,vector")
2751 (set_attr "amdfam10_decode" "double,double")
2752 (set_attr "prefix_rep" "1")
2753 (set_attr "prefix" "maybe_vex")
2754 (set_attr "mode" "DI")])
2756 (define_insn "sse_cvttss2si"
2757 [(set (match_operand:SI 0 "register_operand" "=r,r")
2760 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2761 (parallel [(const_int 0)]))))]
2763 "%vcvttss2si\t{%1, %0|%0, %1}"
2764 [(set_attr "type" "sseicvt")
2765 (set_attr "athlon_decode" "double,vector")
2766 (set_attr "amdfam10_decode" "double,double")
2767 (set_attr "prefix_rep" "1")
2768 (set_attr "prefix" "maybe_vex")
2769 (set_attr "mode" "SI")])
2771 (define_insn "sse_cvttss2siq"
2772 [(set (match_operand:DI 0 "register_operand" "=r,r")
2775 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2776 (parallel [(const_int 0)]))))]
2777 "TARGET_SSE && TARGET_64BIT"
2778 "%vcvttss2siq\t{%1, %0|%0, %1}"
2779 [(set_attr "type" "sseicvt")
2780 (set_attr "athlon_decode" "double,vector")
2781 (set_attr "amdfam10_decode" "double,double")
2782 (set_attr "prefix_rep" "1")
2783 (set_attr "prefix" "maybe_vex")
2784 (set_attr "mode" "DI")])
2786 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2787 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2788 (float:AVXMODEDCVTDQ2PS
2789 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2791 "vcvtdq2ps\t{%1, %0|%0, %1}"
2792 [(set_attr "type" "ssecvt")
2793 (set_attr "prefix" "vex")
2794 (set_attr "mode" "<avxvecmode>")])
2796 (define_insn "sse2_cvtdq2ps"
2797 [(set (match_operand:V4SF 0 "register_operand" "=x")
2798 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2800 "cvtdq2ps\t{%1, %0|%0, %1}"
2801 [(set_attr "type" "ssecvt")
2802 (set_attr "mode" "V4SF")])
2804 (define_expand "sse2_cvtudq2ps"
2806 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2808 (lt:V4SF (match_dup 5) (match_dup 3)))
2810 (and:V4SF (match_dup 6) (match_dup 4)))
2811 (set (match_operand:V4SF 0 "register_operand" "")
2812 (plus:V4SF (match_dup 5) (match_dup 7)))]
2815 REAL_VALUE_TYPE TWO32r;
2819 real_ldexp (&TWO32r, &dconst1, 32);
2820 x = const_double_from_real_value (TWO32r, SFmode);
2822 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2823 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2825 for (i = 5; i < 8; i++)
2826 operands[i] = gen_reg_rtx (V4SFmode);
2829 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2830 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2831 (unspec:AVXMODEDCVTPS2DQ
2832 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2833 UNSPEC_FIX_NOTRUNC))]
2835 "vcvtps2dq\t{%1, %0|%0, %1}"
2836 [(set_attr "type" "ssecvt")
2837 (set_attr "prefix" "vex")
2838 (set_attr "mode" "<avxvecmode>")])
2840 (define_insn "sse2_cvtps2dq"
2841 [(set (match_operand:V4SI 0 "register_operand" "=x")
2842 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2843 UNSPEC_FIX_NOTRUNC))]
2845 "cvtps2dq\t{%1, %0|%0, %1}"
2846 [(set_attr "type" "ssecvt")
2847 (set_attr "prefix_data16" "1")
2848 (set_attr "mode" "TI")])
2850 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2851 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2852 (fix:AVXMODEDCVTPS2DQ
2853 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2855 "vcvttps2dq\t{%1, %0|%0, %1}"
2856 [(set_attr "type" "ssecvt")
2857 (set_attr "prefix" "vex")
2858 (set_attr "mode" "<avxvecmode>")])
2860 (define_insn "sse2_cvttps2dq"
2861 [(set (match_operand:V4SI 0 "register_operand" "=x")
2862 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2864 "cvttps2dq\t{%1, %0|%0, %1}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "prefix_rep" "1")
2867 (set_attr "prefix_data16" "0")
2868 (set_attr "mode" "TI")])
2870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2872 ;; Parallel double-precision floating point conversion operations
2874 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2876 (define_insn "sse2_cvtpi2pd"
2877 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2878 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2880 "cvtpi2pd\t{%1, %0|%0, %1}"
2881 [(set_attr "type" "ssecvt")
2882 (set_attr "unit" "mmx,*")
2883 (set_attr "prefix_data16" "1,*")
2884 (set_attr "mode" "V2DF")])
2886 (define_insn "sse2_cvtpd2pi"
2887 [(set (match_operand:V2SI 0 "register_operand" "=y")
2888 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2889 UNSPEC_FIX_NOTRUNC))]
2891 "cvtpd2pi\t{%1, %0|%0, %1}"
2892 [(set_attr "type" "ssecvt")
2893 (set_attr "unit" "mmx")
2894 (set_attr "prefix_data16" "1")
2895 (set_attr "mode" "DI")])
2897 (define_insn "sse2_cvttpd2pi"
2898 [(set (match_operand:V2SI 0 "register_operand" "=y")
2899 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2901 "cvttpd2pi\t{%1, %0|%0, %1}"
2902 [(set_attr "type" "ssecvt")
2903 (set_attr "unit" "mmx")
2904 (set_attr "prefix_data16" "1")
2905 (set_attr "mode" "TI")])
2907 (define_insn "*avx_cvtsi2sd"
2908 [(set (match_operand:V2DF 0 "register_operand" "=x")
2911 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2912 (match_operand:V2DF 1 "register_operand" "x")
2915 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2916 [(set_attr "type" "sseicvt")
2917 (set_attr "prefix" "vex")
2918 (set_attr "mode" "DF")])
2920 (define_insn "sse2_cvtsi2sd"
2921 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2924 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2925 (match_operand:V2DF 1 "register_operand" "0,0")
2928 "cvtsi2sd\t{%2, %0|%0, %2}"
2929 [(set_attr "type" "sseicvt")
2930 (set_attr "mode" "DF")
2931 (set_attr "athlon_decode" "double,direct")
2932 (set_attr "amdfam10_decode" "vector,double")])
2934 (define_insn "*avx_cvtsi2sdq"
2935 [(set (match_operand:V2DF 0 "register_operand" "=x")
2938 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2939 (match_operand:V2DF 1 "register_operand" "x")
2941 "TARGET_AVX && TARGET_64BIT"
2942 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2943 [(set_attr "type" "sseicvt")
2944 (set_attr "length_vex" "4")
2945 (set_attr "prefix" "vex")
2946 (set_attr "mode" "DF")])
2948 (define_insn "sse2_cvtsi2sdq"
2949 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2952 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2953 (match_operand:V2DF 1 "register_operand" "0,0")
2955 "TARGET_SSE2 && TARGET_64BIT"
2956 "cvtsi2sdq\t{%2, %0|%0, %2}"
2957 [(set_attr "type" "sseicvt")
2958 (set_attr "prefix_rex" "1")
2959 (set_attr "mode" "DF")
2960 (set_attr "athlon_decode" "double,direct")
2961 (set_attr "amdfam10_decode" "vector,double")])
2963 (define_insn "sse2_cvtsd2si"
2964 [(set (match_operand:SI 0 "register_operand" "=r,r")
2967 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2968 (parallel [(const_int 0)]))]
2969 UNSPEC_FIX_NOTRUNC))]
2971 "%vcvtsd2si\t{%1, %0|%0, %1}"
2972 [(set_attr "type" "sseicvt")
2973 (set_attr "athlon_decode" "double,vector")
2974 (set_attr "prefix_rep" "1")
2975 (set_attr "prefix" "maybe_vex")
2976 (set_attr "mode" "SI")])
2978 (define_insn "sse2_cvtsd2si_2"
2979 [(set (match_operand:SI 0 "register_operand" "=r,r")
2980 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2981 UNSPEC_FIX_NOTRUNC))]
2983 "%vcvtsd2si\t{%1, %0|%0, %1}"
2984 [(set_attr "type" "sseicvt")
2985 (set_attr "athlon_decode" "double,vector")
2986 (set_attr "amdfam10_decode" "double,double")
2987 (set_attr "prefix_rep" "1")
2988 (set_attr "prefix" "maybe_vex")
2989 (set_attr "mode" "SI")])
2991 (define_insn "sse2_cvtsd2siq"
2992 [(set (match_operand:DI 0 "register_operand" "=r,r")
2995 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2996 (parallel [(const_int 0)]))]
2997 UNSPEC_FIX_NOTRUNC))]
2998 "TARGET_SSE2 && TARGET_64BIT"
2999 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3000 [(set_attr "type" "sseicvt")
3001 (set_attr "athlon_decode" "double,vector")
3002 (set_attr "prefix_rep" "1")
3003 (set_attr "prefix" "maybe_vex")
3004 (set_attr "mode" "DI")])
3006 (define_insn "sse2_cvtsd2siq_2"
3007 [(set (match_operand:DI 0 "register_operand" "=r,r")
3008 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
3009 UNSPEC_FIX_NOTRUNC))]
3010 "TARGET_SSE2 && TARGET_64BIT"
3011 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3012 [(set_attr "type" "sseicvt")
3013 (set_attr "athlon_decode" "double,vector")
3014 (set_attr "amdfam10_decode" "double,double")
3015 (set_attr "prefix_rep" "1")
3016 (set_attr "prefix" "maybe_vex")
3017 (set_attr "mode" "DI")])
3019 (define_insn "sse2_cvttsd2si"
3020 [(set (match_operand:SI 0 "register_operand" "=r,r")
3023 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3024 (parallel [(const_int 0)]))))]
3026 "%vcvttsd2si\t{%1, %0|%0, %1}"
3027 [(set_attr "type" "sseicvt")
3028 (set_attr "prefix_rep" "1")
3029 (set_attr "prefix" "maybe_vex")
3030 (set_attr "mode" "SI")
3031 (set_attr "athlon_decode" "double,vector")
3032 (set_attr "amdfam10_decode" "double,double")])
3034 (define_insn "sse2_cvttsd2siq"
3035 [(set (match_operand:DI 0 "register_operand" "=r,r")
3038 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3039 (parallel [(const_int 0)]))))]
3040 "TARGET_SSE2 && TARGET_64BIT"
3041 "%vcvttsd2siq\t{%1, %0|%0, %1}"
3042 [(set_attr "type" "sseicvt")
3043 (set_attr "prefix_rep" "1")
3044 (set_attr "prefix" "maybe_vex")
3045 (set_attr "mode" "DI")
3046 (set_attr "athlon_decode" "double,vector")
3047 (set_attr "amdfam10_decode" "double,double")])
3049 (define_insn "avx_cvtdq2pd256"
3050 [(set (match_operand:V4DF 0 "register_operand" "=x")
3051 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
3053 "vcvtdq2pd\t{%1, %0|%0, %1}"
3054 [(set_attr "type" "ssecvt")
3055 (set_attr "prefix" "vex")
3056 (set_attr "mode" "V4DF")])
3058 (define_insn "sse2_cvtdq2pd"
3059 [(set (match_operand:V2DF 0 "register_operand" "=x")
3062 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3063 (parallel [(const_int 0) (const_int 1)]))))]
3065 "%vcvtdq2pd\t{%1, %0|%0, %1}"
3066 [(set_attr "type" "ssecvt")
3067 (set_attr "prefix" "maybe_vex")
3068 (set_attr "mode" "V2DF")])
3070 (define_insn "avx_cvtpd2dq256"
3071 [(set (match_operand:V4SI 0 "register_operand" "=x")
3072 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3073 UNSPEC_FIX_NOTRUNC))]
3075 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3076 [(set_attr "type" "ssecvt")
3077 (set_attr "prefix" "vex")
3078 (set_attr "mode" "OI")])
3080 (define_expand "sse2_cvtpd2dq"
3081 [(set (match_operand:V4SI 0 "register_operand" "")
3083 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
3087 "operands[2] = CONST0_RTX (V2SImode);")
3089 (define_insn "*sse2_cvtpd2dq"
3090 [(set (match_operand:V4SI 0 "register_operand" "=x")
3092 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3094 (match_operand:V2SI 2 "const0_operand" "")))]
3096 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
3097 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
3098 [(set_attr "type" "ssecvt")
3099 (set_attr "prefix_rep" "1")
3100 (set_attr "prefix_data16" "0")
3101 (set_attr "prefix" "maybe_vex")
3102 (set_attr "mode" "TI")
3103 (set_attr "amdfam10_decode" "double")])
3105 (define_insn "avx_cvttpd2dq256"
3106 [(set (match_operand:V4SI 0 "register_operand" "=x")
3107 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3109 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3110 [(set_attr "type" "ssecvt")
3111 (set_attr "prefix" "vex")
3112 (set_attr "mode" "OI")])
3114 (define_expand "sse2_cvttpd2dq"
3115 [(set (match_operand:V4SI 0 "register_operand" "")
3117 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
3120 "operands[2] = CONST0_RTX (V2SImode);")
3122 (define_insn "*sse2_cvttpd2dq"
3123 [(set (match_operand:V4SI 0 "register_operand" "=x")
3125 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3126 (match_operand:V2SI 2 "const0_operand" "")))]
3128 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
3129 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
3130 [(set_attr "type" "ssecvt")
3131 (set_attr "prefix" "maybe_vex")
3132 (set_attr "mode" "TI")
3133 (set_attr "amdfam10_decode" "double")])
3135 (define_insn "*avx_cvtsd2ss"
3136 [(set (match_operand:V4SF 0 "register_operand" "=x")
3139 (float_truncate:V2SF
3140 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
3141 (match_operand:V4SF 1 "register_operand" "x")
3144 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
3145 [(set_attr "type" "ssecvt")
3146 (set_attr "prefix" "vex")
3147 (set_attr "mode" "SF")])
3149 (define_insn "sse2_cvtsd2ss"
3150 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3153 (float_truncate:V2SF
3154 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3155 (match_operand:V4SF 1 "register_operand" "0,0")
3158 "cvtsd2ss\t{%2, %0|%0, %2}"
3159 [(set_attr "type" "ssecvt")
3160 (set_attr "athlon_decode" "vector,double")
3161 (set_attr "amdfam10_decode" "vector,double")
3162 (set_attr "mode" "SF")])
3164 (define_insn "*avx_cvtss2sd"
3165 [(set (match_operand:V2DF 0 "register_operand" "=x")
3169 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3170 (parallel [(const_int 0) (const_int 1)])))
3171 (match_operand:V2DF 1 "register_operand" "x")
3174 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3175 [(set_attr "type" "ssecvt")
3176 (set_attr "prefix" "vex")
3177 (set_attr "mode" "DF")])
3179 (define_insn "sse2_cvtss2sd"
3180 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3184 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3185 (parallel [(const_int 0) (const_int 1)])))
3186 (match_operand:V2DF 1 "register_operand" "0,0")
3189 "cvtss2sd\t{%2, %0|%0, %2}"
3190 [(set_attr "type" "ssecvt")
3191 (set_attr "amdfam10_decode" "vector,double")
3192 (set_attr "mode" "DF")])
3194 (define_insn "avx_cvtpd2ps256"
3195 [(set (match_operand:V4SF 0 "register_operand" "=x")
3196 (float_truncate:V4SF
3197 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3199 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3200 [(set_attr "type" "ssecvt")
3201 (set_attr "prefix" "vex")
3202 (set_attr "mode" "V4SF")])
3204 (define_expand "sse2_cvtpd2ps"
3205 [(set (match_operand:V4SF 0 "register_operand" "")
3207 (float_truncate:V2SF
3208 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3211 "operands[2] = CONST0_RTX (V2SFmode);")
3213 (define_insn "*sse2_cvtpd2ps"
3214 [(set (match_operand:V4SF 0 "register_operand" "=x")
3216 (float_truncate:V2SF
3217 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3218 (match_operand:V2SF 2 "const0_operand" "")))]
3220 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3221 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3222 [(set_attr "type" "ssecvt")
3223 (set_attr "prefix_data16" "1")
3224 (set_attr "prefix" "maybe_vex")
3225 (set_attr "mode" "V4SF")
3226 (set_attr "amdfam10_decode" "double")])
3228 (define_insn "avx_cvtps2pd256"
3229 [(set (match_operand:V4DF 0 "register_operand" "=x")
3231 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3233 "vcvtps2pd\t{%1, %0|%0, %1}"
3234 [(set_attr "type" "ssecvt")
3235 (set_attr "prefix" "vex")
3236 (set_attr "mode" "V4DF")])
3238 (define_insn "sse2_cvtps2pd"
3239 [(set (match_operand:V2DF 0 "register_operand" "=x")
3242 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3243 (parallel [(const_int 0) (const_int 1)]))))]
3245 "%vcvtps2pd\t{%1, %0|%0, %1}"
3246 [(set_attr "type" "ssecvt")
3247 (set_attr "prefix" "maybe_vex")
3248 (set_attr "mode" "V2DF")
3249 (set_attr "prefix_data16" "0")
3250 (set_attr "amdfam10_decode" "direct")])
3252 (define_expand "vec_unpacks_hi_v4sf"
3257 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3258 (parallel [(const_int 6)
3262 (set (match_operand:V2DF 0 "register_operand" "")
3266 (parallel [(const_int 0) (const_int 1)]))))]
3269 operands[2] = gen_reg_rtx (V4SFmode);
3272 (define_expand "vec_unpacks_lo_v4sf"
3273 [(set (match_operand:V2DF 0 "register_operand" "")
3276 (match_operand:V4SF 1 "nonimmediate_operand" "")
3277 (parallel [(const_int 0) (const_int 1)]))))]
3280 (define_expand "vec_unpacks_float_hi_v8hi"
3281 [(match_operand:V4SF 0 "register_operand" "")
3282 (match_operand:V8HI 1 "register_operand" "")]
3285 rtx tmp = gen_reg_rtx (V4SImode);
3287 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3288 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3292 (define_expand "vec_unpacks_float_lo_v8hi"
3293 [(match_operand:V4SF 0 "register_operand" "")
3294 (match_operand:V8HI 1 "register_operand" "")]
3297 rtx tmp = gen_reg_rtx (V4SImode);
3299 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3300 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3304 (define_expand "vec_unpacku_float_hi_v8hi"
3305 [(match_operand:V4SF 0 "register_operand" "")
3306 (match_operand:V8HI 1 "register_operand" "")]
3309 rtx tmp = gen_reg_rtx (V4SImode);
3311 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3312 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3316 (define_expand "vec_unpacku_float_lo_v8hi"
3317 [(match_operand:V4SF 0 "register_operand" "")
3318 (match_operand:V8HI 1 "register_operand" "")]
3321 rtx tmp = gen_reg_rtx (V4SImode);
3323 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3324 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3328 (define_expand "vec_unpacks_float_hi_v4si"
3331 (match_operand:V4SI 1 "nonimmediate_operand" "")
3332 (parallel [(const_int 2)
3336 (set (match_operand:V2DF 0 "register_operand" "")
3340 (parallel [(const_int 0) (const_int 1)]))))]
3342 "operands[2] = gen_reg_rtx (V4SImode);")
3344 (define_expand "vec_unpacks_float_lo_v4si"
3345 [(set (match_operand:V2DF 0 "register_operand" "")
3348 (match_operand:V4SI 1 "nonimmediate_operand" "")
3349 (parallel [(const_int 0) (const_int 1)]))))]
3352 (define_expand "vec_unpacku_float_hi_v4si"
3355 (match_operand:V4SI 1 "nonimmediate_operand" "")
3356 (parallel [(const_int 2)
3364 (parallel [(const_int 0) (const_int 1)]))))
3366 (lt:V2DF (match_dup 6) (match_dup 3)))
3368 (and:V2DF (match_dup 7) (match_dup 4)))
3369 (set (match_operand:V2DF 0 "register_operand" "")
3370 (plus:V2DF (match_dup 6) (match_dup 8)))]
3373 REAL_VALUE_TYPE TWO32r;
3377 real_ldexp (&TWO32r, &dconst1, 32);
3378 x = const_double_from_real_value (TWO32r, DFmode);
3380 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3381 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3383 operands[5] = gen_reg_rtx (V4SImode);
3385 for (i = 6; i < 9; i++)
3386 operands[i] = gen_reg_rtx (V2DFmode);
3389 (define_expand "vec_unpacku_float_lo_v4si"
3393 (match_operand:V4SI 1 "nonimmediate_operand" "")
3394 (parallel [(const_int 0) (const_int 1)]))))
3396 (lt:V2DF (match_dup 5) (match_dup 3)))
3398 (and:V2DF (match_dup 6) (match_dup 4)))
3399 (set (match_operand:V2DF 0 "register_operand" "")
3400 (plus:V2DF (match_dup 5) (match_dup 7)))]
3403 REAL_VALUE_TYPE TWO32r;
3407 real_ldexp (&TWO32r, &dconst1, 32);
3408 x = const_double_from_real_value (TWO32r, DFmode);
3410 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3411 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3413 for (i = 5; i < 8; i++)
3414 operands[i] = gen_reg_rtx (V2DFmode);
3417 (define_expand "vec_pack_trunc_v2df"
3418 [(match_operand:V4SF 0 "register_operand" "")
3419 (match_operand:V2DF 1 "nonimmediate_operand" "")
3420 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3425 r1 = gen_reg_rtx (V4SFmode);
3426 r2 = gen_reg_rtx (V4SFmode);
3428 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3429 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3430 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3434 (define_expand "vec_pack_sfix_trunc_v2df"
3435 [(match_operand:V4SI 0 "register_operand" "")
3436 (match_operand:V2DF 1 "nonimmediate_operand" "")
3437 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3442 r1 = gen_reg_rtx (V4SImode);
3443 r2 = gen_reg_rtx (V4SImode);
3445 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3446 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3447 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3448 gen_lowpart (V2DImode, r1),
3449 gen_lowpart (V2DImode, r2)));
3453 (define_expand "vec_pack_sfix_v2df"
3454 [(match_operand:V4SI 0 "register_operand" "")
3455 (match_operand:V2DF 1 "nonimmediate_operand" "")
3456 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3461 r1 = gen_reg_rtx (V4SImode);
3462 r2 = gen_reg_rtx (V4SImode);
3464 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3465 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3466 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3467 gen_lowpart (V2DImode, r1),
3468 gen_lowpart (V2DImode, r2)));
3472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3474 ;; Parallel single-precision floating point element swizzling
3476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3478 (define_expand "sse_movhlps_exp"
3479 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3482 (match_operand:V4SF 1 "nonimmediate_operand" "")
3483 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3484 (parallel [(const_int 6)
3489 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3491 (define_insn "*avx_movhlps"
3492 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3495 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3496 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3497 (parallel [(const_int 6)
3501 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3503 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3504 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3505 vmovhps\t{%2, %0|%0, %2}"
3506 [(set_attr "type" "ssemov")
3507 (set_attr "prefix" "vex")
3508 (set_attr "mode" "V4SF,V2SF,V2SF")])
3510 (define_insn "sse_movhlps"
3511 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3514 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3515 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3516 (parallel [(const_int 6)
3520 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3522 movhlps\t{%2, %0|%0, %2}
3523 movlps\t{%H2, %0|%0, %H2}
3524 movhps\t{%2, %0|%0, %2}"
3525 [(set_attr "type" "ssemov")
3526 (set_attr "mode" "V4SF,V2SF,V2SF")])
3528 (define_expand "sse_movlhps_exp"
3529 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3532 (match_operand:V4SF 1 "nonimmediate_operand" "")
3533 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3534 (parallel [(const_int 0)
3539 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3541 (define_insn "*avx_movlhps"
3542 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3545 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3546 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3547 (parallel [(const_int 0)
3551 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3553 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3554 vmovhps\t{%2, %1, %0|%0, %1, %2}
3555 vmovlps\t{%2, %H0|%H0, %2}"
3556 [(set_attr "type" "ssemov")
3557 (set_attr "prefix" "vex")
3558 (set_attr "mode" "V4SF,V2SF,V2SF")])
3560 (define_insn "sse_movlhps"
3561 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3564 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3565 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3566 (parallel [(const_int 0)
3570 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3572 movlhps\t{%2, %0|%0, %2}
3573 movhps\t{%2, %0|%0, %2}
3574 movlps\t{%2, %H0|%H0, %2}"
3575 [(set_attr "type" "ssemov")
3576 (set_attr "mode" "V4SF,V2SF,V2SF")])
3578 (define_insn "avx_unpckhps256"
3579 [(set (match_operand:V8SF 0 "register_operand" "=x")
3582 (match_operand:V8SF 1 "register_operand" "x")
3583 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3584 (parallel [(const_int 2) (const_int 10)
3585 (const_int 3) (const_int 11)
3586 (const_int 6) (const_int 14)
3587 (const_int 7) (const_int 15)])))]
3589 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3590 [(set_attr "type" "sselog")
3591 (set_attr "prefix" "vex")
3592 (set_attr "mode" "V8SF")])
3594 (define_insn "*avx_unpckhps"
3595 [(set (match_operand:V4SF 0 "register_operand" "=x")
3598 (match_operand:V4SF 1 "register_operand" "x")
3599 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3600 (parallel [(const_int 2) (const_int 6)
3601 (const_int 3) (const_int 7)])))]
3603 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3604 [(set_attr "type" "sselog")
3605 (set_attr "prefix" "vex")
3606 (set_attr "mode" "V4SF")])
3608 (define_insn "sse_unpckhps"
3609 [(set (match_operand:V4SF 0 "register_operand" "=x")
3612 (match_operand:V4SF 1 "register_operand" "0")
3613 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3614 (parallel [(const_int 2) (const_int 6)
3615 (const_int 3) (const_int 7)])))]
3617 "unpckhps\t{%2, %0|%0, %2}"
3618 [(set_attr "type" "sselog")
3619 (set_attr "mode" "V4SF")])
3621 (define_insn "avx_unpcklps256"
3622 [(set (match_operand:V8SF 0 "register_operand" "=x")
3625 (match_operand:V8SF 1 "register_operand" "x")
3626 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3627 (parallel [(const_int 0) (const_int 8)
3628 (const_int 1) (const_int 9)
3629 (const_int 4) (const_int 12)
3630 (const_int 5) (const_int 13)])))]
3632 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3633 [(set_attr "type" "sselog")
3634 (set_attr "prefix" "vex")
3635 (set_attr "mode" "V8SF")])
3637 (define_insn "*avx_unpcklps"
3638 [(set (match_operand:V4SF 0 "register_operand" "=x")
3641 (match_operand:V4SF 1 "register_operand" "x")
3642 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3643 (parallel [(const_int 0) (const_int 4)
3644 (const_int 1) (const_int 5)])))]
3646 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3647 [(set_attr "type" "sselog")
3648 (set_attr "prefix" "vex")
3649 (set_attr "mode" "V4SF")])
3651 (define_insn "sse_unpcklps"
3652 [(set (match_operand:V4SF 0 "register_operand" "=x")
3655 (match_operand:V4SF 1 "register_operand" "0")
3656 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3657 (parallel [(const_int 0) (const_int 4)
3658 (const_int 1) (const_int 5)])))]
3660 "unpcklps\t{%2, %0|%0, %2}"
3661 [(set_attr "type" "sselog")
3662 (set_attr "mode" "V4SF")])
3664 ;; These are modeled with the same vec_concat as the others so that we
3665 ;; capture users of shufps that can use the new instructions
3666 (define_insn "avx_movshdup256"
3667 [(set (match_operand:V8SF 0 "register_operand" "=x")
3670 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3672 (parallel [(const_int 1) (const_int 1)
3673 (const_int 3) (const_int 3)
3674 (const_int 5) (const_int 5)
3675 (const_int 7) (const_int 7)])))]
3677 "vmovshdup\t{%1, %0|%0, %1}"
3678 [(set_attr "type" "sse")
3679 (set_attr "prefix" "vex")
3680 (set_attr "mode" "V8SF")])
3682 (define_insn "sse3_movshdup"
3683 [(set (match_operand:V4SF 0 "register_operand" "=x")
3686 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3688 (parallel [(const_int 1)
3693 "%vmovshdup\t{%1, %0|%0, %1}"
3694 [(set_attr "type" "sse")
3695 (set_attr "prefix_rep" "1")
3696 (set_attr "prefix" "maybe_vex")
3697 (set_attr "mode" "V4SF")])
3699 (define_insn "avx_movsldup256"
3700 [(set (match_operand:V8SF 0 "register_operand" "=x")
3703 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3705 (parallel [(const_int 0) (const_int 0)
3706 (const_int 2) (const_int 2)
3707 (const_int 4) (const_int 4)
3708 (const_int 6) (const_int 6)])))]
3710 "vmovsldup\t{%1, %0|%0, %1}"
3711 [(set_attr "type" "sse")
3712 (set_attr "prefix" "vex")
3713 (set_attr "mode" "V8SF")])
3715 (define_insn "sse3_movsldup"
3716 [(set (match_operand:V4SF 0 "register_operand" "=x")
3719 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3721 (parallel [(const_int 0)
3726 "%vmovsldup\t{%1, %0|%0, %1}"
3727 [(set_attr "type" "sse")
3728 (set_attr "prefix_rep" "1")
3729 (set_attr "prefix" "maybe_vex")
3730 (set_attr "mode" "V4SF")])
3732 (define_expand "avx_shufps256"
3733 [(match_operand:V8SF 0 "register_operand" "")
3734 (match_operand:V8SF 1 "register_operand" "")
3735 (match_operand:V8SF 2 "nonimmediate_operand" "")
3736 (match_operand:SI 3 "const_int_operand" "")]
3739 int mask = INTVAL (operands[3]);
3740 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3741 GEN_INT ((mask >> 0) & 3),
3742 GEN_INT ((mask >> 2) & 3),
3743 GEN_INT (((mask >> 4) & 3) + 8),
3744 GEN_INT (((mask >> 6) & 3) + 8),
3745 GEN_INT (((mask >> 0) & 3) + 4),
3746 GEN_INT (((mask >> 2) & 3) + 4),
3747 GEN_INT (((mask >> 4) & 3) + 12),
3748 GEN_INT (((mask >> 6) & 3) + 12)));
3752 ;; One bit in mask selects 2 elements.
3753 (define_insn "avx_shufps256_1"
3754 [(set (match_operand:V8SF 0 "register_operand" "=x")
3757 (match_operand:V8SF 1 "register_operand" "x")
3758 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3759 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3760 (match_operand 4 "const_0_to_3_operand" "")
3761 (match_operand 5 "const_8_to_11_operand" "")
3762 (match_operand 6 "const_8_to_11_operand" "")
3763 (match_operand 7 "const_4_to_7_operand" "")
3764 (match_operand 8 "const_4_to_7_operand" "")
3765 (match_operand 9 "const_12_to_15_operand" "")
3766 (match_operand 10 "const_12_to_15_operand" "")])))]
3768 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3769 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3770 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3771 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3774 mask = INTVAL (operands[3]);
3775 mask |= INTVAL (operands[4]) << 2;
3776 mask |= (INTVAL (operands[5]) - 8) << 4;
3777 mask |= (INTVAL (operands[6]) - 8) << 6;
3778 operands[3] = GEN_INT (mask);
3780 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3782 [(set_attr "type" "sselog")
3783 (set_attr "length_immediate" "1")
3784 (set_attr "prefix" "vex")
3785 (set_attr "mode" "V8SF")])
3787 (define_expand "sse_shufps"
3788 [(match_operand:V4SF 0 "register_operand" "")
3789 (match_operand:V4SF 1 "register_operand" "")
3790 (match_operand:V4SF 2 "nonimmediate_operand" "")
3791 (match_operand:SI 3 "const_int_operand" "")]
3794 int mask = INTVAL (operands[3]);
3795 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3796 GEN_INT ((mask >> 0) & 3),
3797 GEN_INT ((mask >> 2) & 3),
3798 GEN_INT (((mask >> 4) & 3) + 4),
3799 GEN_INT (((mask >> 6) & 3) + 4)));
3803 (define_insn "*avx_shufps_<mode>"
3804 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3805 (vec_select:SSEMODE4S
3806 (vec_concat:<ssedoublesizemode>
3807 (match_operand:SSEMODE4S 1 "register_operand" "x")
3808 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3809 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3810 (match_operand 4 "const_0_to_3_operand" "")
3811 (match_operand 5 "const_4_to_7_operand" "")
3812 (match_operand 6 "const_4_to_7_operand" "")])))]
3816 mask |= INTVAL (operands[3]) << 0;
3817 mask |= INTVAL (operands[4]) << 2;
3818 mask |= (INTVAL (operands[5]) - 4) << 4;
3819 mask |= (INTVAL (operands[6]) - 4) << 6;
3820 operands[3] = GEN_INT (mask);
3822 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3824 [(set_attr "type" "sselog")
3825 (set_attr "length_immediate" "1")
3826 (set_attr "prefix" "vex")
3827 (set_attr "mode" "V4SF")])
3829 (define_insn "sse_shufps_<mode>"
3830 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3831 (vec_select:SSEMODE4S
3832 (vec_concat:<ssedoublesizemode>
3833 (match_operand:SSEMODE4S 1 "register_operand" "0")
3834 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3835 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3836 (match_operand 4 "const_0_to_3_operand" "")
3837 (match_operand 5 "const_4_to_7_operand" "")
3838 (match_operand 6 "const_4_to_7_operand" "")])))]
3842 mask |= INTVAL (operands[3]) << 0;
3843 mask |= INTVAL (operands[4]) << 2;
3844 mask |= (INTVAL (operands[5]) - 4) << 4;
3845 mask |= (INTVAL (operands[6]) - 4) << 6;
3846 operands[3] = GEN_INT (mask);
3848 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3850 [(set_attr "type" "sselog")
3851 (set_attr "length_immediate" "1")
3852 (set_attr "mode" "V4SF")])
3854 (define_insn "sse_storehps"
3855 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3857 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3858 (parallel [(const_int 2) (const_int 3)])))]
3861 %vmovhps\t{%1, %0|%0, %1}
3862 %vmovhlps\t{%1, %d0|%d0, %1}
3863 %vmovlps\t{%H1, %d0|%d0, %H1}"
3864 [(set_attr "type" "ssemov")
3865 (set_attr "prefix" "maybe_vex")
3866 (set_attr "mode" "V2SF,V4SF,V2SF")])
3868 (define_expand "sse_loadhps_exp"
3869 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3872 (match_operand:V4SF 1 "nonimmediate_operand" "")
3873 (parallel [(const_int 0) (const_int 1)]))
3874 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3876 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3878 (define_insn "*avx_loadhps"
3879 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3882 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3883 (parallel [(const_int 0) (const_int 1)]))
3884 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3887 vmovhps\t{%2, %1, %0|%0, %1, %2}
3888 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3889 vmovlps\t{%2, %H0|%H0, %2}"
3890 [(set_attr "type" "ssemov")
3891 (set_attr "prefix" "vex")
3892 (set_attr "mode" "V2SF,V4SF,V2SF")])
3894 (define_insn "sse_loadhps"
3895 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3898 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3899 (parallel [(const_int 0) (const_int 1)]))
3900 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3903 movhps\t{%2, %0|%0, %2}
3904 movlhps\t{%2, %0|%0, %2}
3905 movlps\t{%2, %H0|%H0, %2}"
3906 [(set_attr "type" "ssemov")
3907 (set_attr "mode" "V2SF,V4SF,V2SF")])
3909 (define_insn "*avx_storelps"
3910 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3912 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3913 (parallel [(const_int 0) (const_int 1)])))]
3916 vmovlps\t{%1, %0|%0, %1}
3917 vmovaps\t{%1, %0|%0, %1}
3918 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3919 [(set_attr "type" "ssemov")
3920 (set_attr "prefix" "vex")
3921 (set_attr "mode" "V2SF,V2DF,V2SF")])
3923 (define_insn "sse_storelps"
3924 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3926 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3927 (parallel [(const_int 0) (const_int 1)])))]
3930 movlps\t{%1, %0|%0, %1}
3931 movaps\t{%1, %0|%0, %1}
3932 movlps\t{%1, %0|%0, %1}"
3933 [(set_attr "type" "ssemov")
3934 (set_attr "mode" "V2SF,V4SF,V2SF")])
3936 (define_expand "sse_loadlps_exp"
3937 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3939 (match_operand:V2SF 2 "nonimmediate_operand" "")
3941 (match_operand:V4SF 1 "nonimmediate_operand" "")
3942 (parallel [(const_int 2) (const_int 3)]))))]
3944 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3946 (define_insn "*avx_loadlps"
3947 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3949 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3951 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3952 (parallel [(const_int 2) (const_int 3)]))))]
3955 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3956 vmovlps\t{%2, %1, %0|%0, %1, %2}
3957 vmovlps\t{%2, %0|%0, %2}"
3958 [(set_attr "type" "sselog,ssemov,ssemov")
3959 (set_attr "length_immediate" "1,*,*")
3960 (set_attr "prefix" "vex")
3961 (set_attr "mode" "V4SF,V2SF,V2SF")])
3963 (define_insn "sse_loadlps"
3964 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3966 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3968 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3969 (parallel [(const_int 2) (const_int 3)]))))]
3972 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3973 movlps\t{%2, %0|%0, %2}
3974 movlps\t{%2, %0|%0, %2}"
3975 [(set_attr "type" "sselog,ssemov,ssemov")
3976 (set_attr "length_immediate" "1,*,*")
3977 (set_attr "mode" "V4SF,V2SF,V2SF")])
3979 (define_insn "*avx_movss"
3980 [(set (match_operand:V4SF 0 "register_operand" "=x")
3982 (match_operand:V4SF 2 "register_operand" "x")
3983 (match_operand:V4SF 1 "register_operand" "x")
3986 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3987 [(set_attr "type" "ssemov")
3988 (set_attr "prefix" "vex")
3989 (set_attr "mode" "SF")])
3991 (define_insn "sse_movss"
3992 [(set (match_operand:V4SF 0 "register_operand" "=x")
3994 (match_operand:V4SF 2 "register_operand" "x")
3995 (match_operand:V4SF 1 "register_operand" "0")
3998 "movss\t{%2, %0|%0, %2}"
3999 [(set_attr "type" "ssemov")
4000 (set_attr "mode" "SF")])
4002 (define_insn "*vec_dupv4sf_avx"
4003 [(set (match_operand:V4SF 0 "register_operand" "=x")
4005 (match_operand:SF 1 "register_operand" "x")))]
4007 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
4008 [(set_attr "type" "sselog1")
4009 (set_attr "length_immediate" "1")
4010 (set_attr "prefix" "vex")
4011 (set_attr "mode" "V4SF")])
4013 (define_insn "*vec_dupv4sf"
4014 [(set (match_operand:V4SF 0 "register_operand" "=x")
4016 (match_operand:SF 1 "register_operand" "0")))]
4018 "shufps\t{$0, %0, %0|%0, %0, 0}"
4019 [(set_attr "type" "sselog1")
4020 (set_attr "length_immediate" "1")
4021 (set_attr "mode" "V4SF")])
4023 (define_insn "*vec_concatv2sf_avx"
4024 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4026 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
4027 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4030 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4031 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4032 vmovss\t{%1, %0|%0, %1}
4033 punpckldq\t{%2, %0|%0, %2}
4034 movd\t{%1, %0|%0, %1}"
4035 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4036 (set_attr "length_immediate" "*,1,*,*,*")
4037 (set_attr "prefix_extra" "*,1,*,*,*")
4038 (set (attr "prefix")
4039 (if_then_else (eq_attr "alternative" "3,4")
4040 (const_string "orig")
4041 (const_string "vex")))
4042 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4044 ;; Although insertps takes register source, we prefer
4045 ;; unpcklps with register source since it is shorter.
4046 (define_insn "*vec_concatv2sf_sse4_1"
4047 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4049 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
4050 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4053 unpcklps\t{%2, %0|%0, %2}
4054 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4055 movss\t{%1, %0|%0, %1}
4056 punpckldq\t{%2, %0|%0, %2}
4057 movd\t{%1, %0|%0, %1}"
4058 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4059 (set_attr "prefix_data16" "*,1,*,*,*")
4060 (set_attr "prefix_extra" "*,1,*,*,*")
4061 (set_attr "length_immediate" "*,1,*,*,*")
4062 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4064 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4065 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4066 ;; alternatives pretty much forces the MMX alternative to be chosen.
4067 (define_insn "*vec_concatv2sf_sse"
4068 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4070 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4071 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4074 unpcklps\t{%2, %0|%0, %2}
4075 movss\t{%1, %0|%0, %1}
4076 punpckldq\t{%2, %0|%0, %2}
4077 movd\t{%1, %0|%0, %1}"
4078 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4079 (set_attr "mode" "V4SF,SF,DI,DI")])
4081 (define_insn "*vec_concatv4sf_avx"
4082 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4084 (match_operand:V2SF 1 "register_operand" " x,x")
4085 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4088 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4089 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4090 [(set_attr "type" "ssemov")
4091 (set_attr "prefix" "vex")
4092 (set_attr "mode" "V4SF,V2SF")])
4094 (define_insn "*vec_concatv4sf_sse"
4095 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4097 (match_operand:V2SF 1 "register_operand" " 0,0")
4098 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4101 movlhps\t{%2, %0|%0, %2}
4102 movhps\t{%2, %0|%0, %2}"
4103 [(set_attr "type" "ssemov")
4104 (set_attr "mode" "V4SF,V2SF")])
4106 (define_expand "vec_init<mode>"
4107 [(match_operand:SSEMODE 0 "register_operand" "")
4108 (match_operand 1 "" "")]
4111 ix86_expand_vector_init (false, operands[0], operands[1]);
4115 (define_insn "*vec_setv4sf_0_avx"
4116 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
4119 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4120 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
4124 vmovss\t{%2, %1, %0|%0, %1, %2}
4125 vmovss\t{%2, %0|%0, %2}
4126 vmovd\t{%2, %0|%0, %2}
4128 [(set_attr "type" "ssemov")
4129 (set_attr "prefix" "vex")
4130 (set_attr "mode" "SF")])
4132 (define_insn "vec_setv4sf_0"
4133 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
4136 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4137 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
4141 movss\t{%2, %0|%0, %2}
4142 movss\t{%2, %0|%0, %2}
4143 movd\t{%2, %0|%0, %2}
4145 [(set_attr "type" "ssemov")
4146 (set_attr "mode" "SF")])
4148 ;; A subset is vec_setv4sf.
4149 (define_insn "*vec_setv4sf_avx"
4150 [(set (match_operand:V4SF 0 "register_operand" "=x")
4153 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4154 (match_operand:V4SF 1 "register_operand" "x")
4155 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4158 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4159 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4161 [(set_attr "type" "sselog")
4162 (set_attr "prefix_extra" "1")
4163 (set_attr "length_immediate" "1")
4164 (set_attr "prefix" "vex")
4165 (set_attr "mode" "V4SF")])
4167 (define_insn "*vec_setv4sf_sse4_1"
4168 [(set (match_operand:V4SF 0 "register_operand" "=x")
4171 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4172 (match_operand:V4SF 1 "register_operand" "0")
4173 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4176 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4177 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4179 [(set_attr "type" "sselog")
4180 (set_attr "prefix_data16" "1")
4181 (set_attr "prefix_extra" "1")
4182 (set_attr "length_immediate" "1")
4183 (set_attr "mode" "V4SF")])
4185 (define_insn "*avx_insertps"
4186 [(set (match_operand:V4SF 0 "register_operand" "=x")
4187 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4188 (match_operand:V4SF 1 "register_operand" "x")
4189 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4192 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4193 [(set_attr "type" "sselog")
4194 (set_attr "prefix" "vex")
4195 (set_attr "prefix_extra" "1")
4196 (set_attr "length_immediate" "1")
4197 (set_attr "mode" "V4SF")])
4199 (define_insn "sse4_1_insertps"
4200 [(set (match_operand:V4SF 0 "register_operand" "=x")
4201 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4202 (match_operand:V4SF 1 "register_operand" "0")
4203 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4206 "insertps\t{%3, %2, %0|%0, %2, %3}";
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_data16" "1")
4209 (set_attr "prefix_extra" "1")
4210 (set_attr "length_immediate" "1")
4211 (set_attr "mode" "V4SF")])
4214 [(set (match_operand:V4SF 0 "memory_operand" "")
4217 (match_operand:SF 1 "nonmemory_operand" ""))
4220 "TARGET_SSE && reload_completed"
4223 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4227 (define_expand "vec_set<mode>"
4228 [(match_operand:SSEMODE 0 "register_operand" "")
4229 (match_operand:<ssescalarmode> 1 "register_operand" "")
4230 (match_operand 2 "const_int_operand" "")]
4233 ix86_expand_vector_set (false, operands[0], operands[1],
4234 INTVAL (operands[2]));
4238 (define_insn_and_split "*vec_extractv4sf_0"
4239 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4241 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4242 (parallel [(const_int 0)])))]
4243 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4245 "&& reload_completed"
4248 rtx op1 = operands[1];
4250 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4252 op1 = gen_lowpart (SFmode, op1);
4253 emit_move_insn (operands[0], op1);
4257 (define_expand "avx_vextractf128<mode>"
4258 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4259 (match_operand:AVX256MODE 1 "register_operand" "")
4260 (match_operand:SI 2 "const_0_to_1_operand" "")]
4263 switch (INTVAL (operands[2]))
4266 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4269 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4277 (define_insn "vec_extract_lo_<mode>"
4278 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4279 (vec_select:<avxhalfvecmode>
4280 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4281 (parallel [(const_int 0) (const_int 1)])))]
4283 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4284 [(set_attr "type" "sselog")
4285 (set_attr "prefix_extra" "1")
4286 (set_attr "length_immediate" "1")
4287 (set_attr "memory" "none,store")
4288 (set_attr "prefix" "vex")
4289 (set_attr "mode" "V8SF")])
4291 (define_insn "vec_extract_hi_<mode>"
4292 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4293 (vec_select:<avxhalfvecmode>
4294 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4295 (parallel [(const_int 2) (const_int 3)])))]
4297 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4298 [(set_attr "type" "sselog")
4299 (set_attr "prefix_extra" "1")
4300 (set_attr "length_immediate" "1")
4301 (set_attr "memory" "none,store")
4302 (set_attr "prefix" "vex")
4303 (set_attr "mode" "V8SF")])
4305 (define_insn "vec_extract_lo_<mode>"
4306 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4307 (vec_select:<avxhalfvecmode>
4308 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4309 (parallel [(const_int 0) (const_int 1)
4310 (const_int 2) (const_int 3)])))]
4312 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4313 [(set_attr "type" "sselog")
4314 (set_attr "prefix_extra" "1")
4315 (set_attr "length_immediate" "1")
4316 (set_attr "memory" "none,store")
4317 (set_attr "prefix" "vex")
4318 (set_attr "mode" "V8SF")])
4320 (define_insn "vec_extract_hi_<mode>"
4321 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4322 (vec_select:<avxhalfvecmode>
4323 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4324 (parallel [(const_int 4) (const_int 5)
4325 (const_int 6) (const_int 7)])))]
4327 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4328 [(set_attr "type" "sselog")
4329 (set_attr "prefix_extra" "1")
4330 (set_attr "length_immediate" "1")
4331 (set_attr "memory" "none,store")
4332 (set_attr "prefix" "vex")
4333 (set_attr "mode" "V8SF")])
4335 (define_insn "vec_extract_lo_v16hi"
4336 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4338 (match_operand:V16HI 1 "register_operand" "x,x")
4339 (parallel [(const_int 0) (const_int 1)
4340 (const_int 2) (const_int 3)
4341 (const_int 4) (const_int 5)
4342 (const_int 6) (const_int 7)])))]
4344 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4345 [(set_attr "type" "sselog")
4346 (set_attr "prefix_extra" "1")
4347 (set_attr "length_immediate" "1")
4348 (set_attr "memory" "none,store")
4349 (set_attr "prefix" "vex")
4350 (set_attr "mode" "V8SF")])
4352 (define_insn "vec_extract_hi_v16hi"
4353 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4355 (match_operand:V16HI 1 "register_operand" "x,x")
4356 (parallel [(const_int 8) (const_int 9)
4357 (const_int 10) (const_int 11)
4358 (const_int 12) (const_int 13)
4359 (const_int 14) (const_int 15)])))]
4361 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4362 [(set_attr "type" "sselog")
4363 (set_attr "prefix_extra" "1")
4364 (set_attr "length_immediate" "1")
4365 (set_attr "memory" "none,store")
4366 (set_attr "prefix" "vex")
4367 (set_attr "mode" "V8SF")])
4369 (define_insn "vec_extract_lo_v32qi"
4370 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4372 (match_operand:V32QI 1 "register_operand" "x,x")
4373 (parallel [(const_int 0) (const_int 1)
4374 (const_int 2) (const_int 3)
4375 (const_int 4) (const_int 5)
4376 (const_int 6) (const_int 7)
4377 (const_int 8) (const_int 9)
4378 (const_int 10) (const_int 11)
4379 (const_int 12) (const_int 13)
4380 (const_int 14) (const_int 15)])))]
4382 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4383 [(set_attr "type" "sselog")
4384 (set_attr "prefix_extra" "1")
4385 (set_attr "length_immediate" "1")
4386 (set_attr "memory" "none,store")
4387 (set_attr "prefix" "vex")
4388 (set_attr "mode" "V8SF")])
4390 (define_insn "vec_extract_hi_v32qi"
4391 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4393 (match_operand:V32QI 1 "register_operand" "x,x")
4394 (parallel [(const_int 16) (const_int 17)
4395 (const_int 18) (const_int 19)
4396 (const_int 20) (const_int 21)
4397 (const_int 22) (const_int 23)
4398 (const_int 24) (const_int 25)
4399 (const_int 26) (const_int 27)
4400 (const_int 28) (const_int 29)
4401 (const_int 30) (const_int 31)])))]
4403 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4404 [(set_attr "type" "sselog")
4405 (set_attr "prefix_extra" "1")
4406 (set_attr "length_immediate" "1")
4407 (set_attr "memory" "none,store")
4408 (set_attr "prefix" "vex")
4409 (set_attr "mode" "V8SF")])
4411 (define_insn "*sse4_1_extractps"
4412 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4414 (match_operand:V4SF 1 "register_operand" "x")
4415 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4417 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4418 [(set_attr "type" "sselog")
4419 (set_attr "prefix_data16" "1")
4420 (set_attr "prefix_extra" "1")
4421 (set_attr "length_immediate" "1")
4422 (set_attr "prefix" "maybe_vex")
4423 (set_attr "mode" "V4SF")])
4425 (define_insn_and_split "*vec_extract_v4sf_mem"
4426 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4428 (match_operand:V4SF 1 "memory_operand" "o")
4429 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4435 int i = INTVAL (operands[2]);
4437 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4441 (define_expand "vec_extract<mode>"
4442 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4443 (match_operand:SSEMODE 1 "register_operand" "")
4444 (match_operand 2 "const_int_operand" "")]
4447 ix86_expand_vector_extract (false, operands[0], operands[1],
4448 INTVAL (operands[2]));
4452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4454 ;; Parallel double-precision floating point element swizzling
4456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4458 (define_insn "avx_unpckhpd256"
4459 [(set (match_operand:V4DF 0 "register_operand" "=x")
4462 (match_operand:V4DF 1 "register_operand" "x")
4463 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4464 (parallel [(const_int 1) (const_int 5)
4465 (const_int 3) (const_int 7)])))]
4467 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4468 [(set_attr "type" "sselog")
4469 (set_attr "prefix" "vex")
4470 (set_attr "mode" "V4DF")])
4472 (define_expand "sse2_unpckhpd_exp"
4473 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4476 (match_operand:V2DF 1 "nonimmediate_operand" "")
4477 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4478 (parallel [(const_int 1)
4481 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4483 (define_insn "*avx_unpckhpd"
4484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4487 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4488 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4489 (parallel [(const_int 1)
4491 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4493 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4494 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4495 vmovhpd\t{%1, %0|%0, %1}"
4496 [(set_attr "type" "sselog,ssemov,ssemov")
4497 (set_attr "prefix" "vex")
4498 (set_attr "mode" "V2DF,V1DF,V1DF")])
4500 (define_insn "sse2_unpckhpd"
4501 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4504 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4505 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4506 (parallel [(const_int 1)
4508 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4510 unpckhpd\t{%2, %0|%0, %2}
4511 movlpd\t{%H1, %0|%0, %H1}
4512 movhpd\t{%1, %0|%0, %1}"
4513 [(set_attr "type" "sselog,ssemov,ssemov")
4514 (set_attr "prefix_data16" "*,1,1")
4515 (set_attr "mode" "V2DF,V1DF,V1DF")])
4517 (define_insn "avx_movddup256"
4518 [(set (match_operand:V4DF 0 "register_operand" "=x")
4521 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4523 (parallel [(const_int 0) (const_int 2)
4524 (const_int 4) (const_int 6)])))]
4526 "vmovddup\t{%1, %0|%0, %1}"
4527 [(set_attr "type" "sselog1")
4528 (set_attr "prefix" "vex")
4529 (set_attr "mode" "V4DF")])
4531 (define_insn "*avx_movddup"
4532 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4535 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4537 (parallel [(const_int 0)
4539 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4541 vmovddup\t{%1, %0|%0, %1}
4543 [(set_attr "type" "sselog1,ssemov")
4544 (set_attr "prefix" "vex")
4545 (set_attr "mode" "V2DF")])
4547 (define_insn "*sse3_movddup"
4548 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4551 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4553 (parallel [(const_int 0)
4555 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4557 movddup\t{%1, %0|%0, %1}
4559 [(set_attr "type" "sselog1,ssemov")
4560 (set_attr "mode" "V2DF")])
4563 [(set (match_operand:V2DF 0 "memory_operand" "")
4566 (match_operand:V2DF 1 "register_operand" "")
4568 (parallel [(const_int 0)
4570 "TARGET_SSE3 && reload_completed"
4573 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4574 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4575 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4579 (define_insn "avx_unpcklpd256"
4580 [(set (match_operand:V4DF 0 "register_operand" "=x")
4583 (match_operand:V4DF 1 "register_operand" "x")
4584 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4585 (parallel [(const_int 0) (const_int 4)
4586 (const_int 2) (const_int 6)])))]
4588 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4589 [(set_attr "type" "sselog")
4590 (set_attr "prefix" "vex")
4591 (set_attr "mode" "V4DF")])
4593 (define_expand "sse2_unpcklpd_exp"
4594 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4597 (match_operand:V2DF 1 "nonimmediate_operand" "")
4598 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4599 (parallel [(const_int 0)
4602 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4604 (define_insn "*avx_unpcklpd"
4605 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4608 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4609 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4610 (parallel [(const_int 0)
4612 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4614 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4615 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4616 vmovlpd\t{%2, %H0|%H0, %2}"
4617 [(set_attr "type" "sselog,ssemov,ssemov")
4618 (set_attr "prefix" "vex")
4619 (set_attr "mode" "V2DF,V1DF,V1DF")])
4621 (define_insn "sse2_unpcklpd"
4622 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4625 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4626 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4627 (parallel [(const_int 0)
4629 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4631 unpcklpd\t{%2, %0|%0, %2}
4632 movhpd\t{%2, %0|%0, %2}
4633 movlpd\t{%2, %H0|%H0, %2}"
4634 [(set_attr "type" "sselog,ssemov,ssemov")
4635 (set_attr "prefix_data16" "*,1,1")
4636 (set_attr "mode" "V2DF,V1DF,V1DF")])
4638 (define_expand "avx_shufpd256"
4639 [(match_operand:V4DF 0 "register_operand" "")
4640 (match_operand:V4DF 1 "register_operand" "")
4641 (match_operand:V4DF 2 "nonimmediate_operand" "")
4642 (match_operand:SI 3 "const_int_operand" "")]
4645 int mask = INTVAL (operands[3]);
4646 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4648 GEN_INT (mask & 2 ? 5 : 4),
4649 GEN_INT (mask & 4 ? 3 : 2),
4650 GEN_INT (mask & 8 ? 7 : 6)));
4654 (define_insn "avx_shufpd256_1"
4655 [(set (match_operand:V4DF 0 "register_operand" "=x")
4658 (match_operand:V4DF 1 "register_operand" "x")
4659 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4660 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4661 (match_operand 4 "const_4_to_5_operand" "")
4662 (match_operand 5 "const_2_to_3_operand" "")
4663 (match_operand 6 "const_6_to_7_operand" "")])))]
4667 mask = INTVAL (operands[3]);
4668 mask |= (INTVAL (operands[4]) - 4) << 1;
4669 mask |= (INTVAL (operands[5]) - 2) << 2;
4670 mask |= (INTVAL (operands[6]) - 6) << 3;
4671 operands[3] = GEN_INT (mask);
4673 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4675 [(set_attr "type" "sselog")
4676 (set_attr "length_immediate" "1")
4677 (set_attr "prefix" "vex")
4678 (set_attr "mode" "V4DF")])
4680 (define_expand "sse2_shufpd"
4681 [(match_operand:V2DF 0 "register_operand" "")
4682 (match_operand:V2DF 1 "register_operand" "")
4683 (match_operand:V2DF 2 "nonimmediate_operand" "")
4684 (match_operand:SI 3 "const_int_operand" "")]
4687 int mask = INTVAL (operands[3]);
4688 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4690 GEN_INT (mask & 2 ? 3 : 2)));
4694 (define_expand "vec_extract_even<mode>"
4695 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4696 (vec_select:SSEMODE4S
4697 (vec_concat:<ssedoublesizemode>
4698 (match_operand:SSEMODE4S 1 "register_operand" "")
4699 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4700 (parallel [(const_int 0)
4706 (define_expand "vec_extract_odd<mode>"
4707 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4708 (vec_select:SSEMODE4S
4709 (vec_concat:<ssedoublesizemode>
4710 (match_operand:SSEMODE4S 1 "register_operand" "")
4711 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4712 (parallel [(const_int 1)
4718 (define_expand "vec_extract_even<mode>"
4719 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4720 (vec_select:SSEMODE2D
4721 (vec_concat:<ssedoublesizemode>
4722 (match_operand:SSEMODE2D 1 "register_operand" "")
4723 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4724 (parallel [(const_int 0)
4728 (define_expand "vec_extract_odd<mode>"
4729 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4730 (vec_select:SSEMODE2D
4731 (vec_concat:<ssedoublesizemode>
4732 (match_operand:SSEMODE2D 1 "register_operand" "")
4733 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4734 (parallel [(const_int 1)
4738 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4739 (define_insn "*avx_punpckhqdq"
4740 [(set (match_operand:V2DI 0 "register_operand" "=x")
4743 (match_operand:V2DI 1 "register_operand" "x")
4744 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4745 (parallel [(const_int 1)
4748 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4749 [(set_attr "type" "sselog")
4750 (set_attr "prefix" "vex")
4751 (set_attr "mode" "TI")])
4753 (define_insn "sse2_punpckhqdq"
4754 [(set (match_operand:V2DI 0 "register_operand" "=x")
4757 (match_operand:V2DI 1 "register_operand" "0")
4758 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4759 (parallel [(const_int 1)
4762 "punpckhqdq\t{%2, %0|%0, %2}"
4763 [(set_attr "type" "sselog")
4764 (set_attr "prefix_data16" "1")
4765 (set_attr "mode" "TI")])
4767 (define_insn "*avx_punpcklqdq"
4768 [(set (match_operand:V2DI 0 "register_operand" "=x")
4771 (match_operand:V2DI 1 "register_operand" "x")
4772 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4773 (parallel [(const_int 0)
4776 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4777 [(set_attr "type" "sselog")
4778 (set_attr "prefix" "vex")
4779 (set_attr "mode" "TI")])
4781 (define_insn "sse2_punpcklqdq"
4782 [(set (match_operand:V2DI 0 "register_operand" "=x")
4785 (match_operand:V2DI 1 "register_operand" "0")
4786 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4787 (parallel [(const_int 0)
4790 "punpcklqdq\t{%2, %0|%0, %2}"
4791 [(set_attr "type" "sselog")
4792 (set_attr "prefix_data16" "1")
4793 (set_attr "mode" "TI")])
4795 (define_insn "*avx_shufpd_<mode>"
4796 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4797 (vec_select:SSEMODE2D
4798 (vec_concat:<ssedoublesizemode>
4799 (match_operand:SSEMODE2D 1 "register_operand" "x")
4800 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4801 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4802 (match_operand 4 "const_2_to_3_operand" "")])))]
4806 mask = INTVAL (operands[3]);
4807 mask |= (INTVAL (operands[4]) - 2) << 1;
4808 operands[3] = GEN_INT (mask);
4810 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4812 [(set_attr "type" "sselog")
4813 (set_attr "length_immediate" "1")
4814 (set_attr "prefix" "vex")
4815 (set_attr "mode" "V2DF")])
4817 (define_insn "sse2_shufpd_<mode>"
4818 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4819 (vec_select:SSEMODE2D
4820 (vec_concat:<ssedoublesizemode>
4821 (match_operand:SSEMODE2D 1 "register_operand" "0")
4822 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4823 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4824 (match_operand 4 "const_2_to_3_operand" "")])))]
4828 mask = INTVAL (operands[3]);
4829 mask |= (INTVAL (operands[4]) - 2) << 1;
4830 operands[3] = GEN_INT (mask);
4832 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4834 [(set_attr "type" "sselog")
4835 (set_attr "length_immediate" "1")
4836 (set_attr "mode" "V2DF")])
4838 ;; Avoid combining registers from different units in a single alternative,
4839 ;; see comment above inline_secondary_memory_needed function in i386.c
4840 (define_insn "*avx_storehpd"
4841 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4843 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4844 (parallel [(const_int 1)])))]
4845 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4847 vmovhpd\t{%1, %0|%0, %1}
4848 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4852 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4853 (set_attr "prefix" "vex")
4854 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4856 (define_insn "sse2_storehpd"
4857 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4859 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4860 (parallel [(const_int 1)])))]
4861 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4863 movhpd\t{%1, %0|%0, %1}
4868 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4869 (set_attr "prefix_data16" "1,*,*,*,*")
4870 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4873 [(set (match_operand:DF 0 "register_operand" "")
4875 (match_operand:V2DF 1 "memory_operand" "")
4876 (parallel [(const_int 1)])))]
4877 "TARGET_SSE2 && reload_completed"
4878 [(set (match_dup 0) (match_dup 1))]
4880 operands[1] = adjust_address (operands[1], DFmode, 8);
4883 ;; Avoid combining registers from different units in a single alternative,
4884 ;; see comment above inline_secondary_memory_needed function in i386.c
4885 (define_insn "sse2_storelpd"
4886 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4888 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4889 (parallel [(const_int 0)])))]
4890 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4892 %vmovlpd\t{%1, %0|%0, %1}
4897 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4898 (set_attr "prefix_data16" "1,*,*,*,*")
4899 (set_attr "prefix" "maybe_vex")
4900 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4903 [(set (match_operand:DF 0 "register_operand" "")
4905 (match_operand:V2DF 1 "nonimmediate_operand" "")
4906 (parallel [(const_int 0)])))]
4907 "TARGET_SSE2 && reload_completed"
4910 rtx op1 = operands[1];
4912 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4914 op1 = gen_lowpart (DFmode, op1);
4915 emit_move_insn (operands[0], op1);
4919 (define_expand "sse2_loadhpd_exp"
4920 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4923 (match_operand:V2DF 1 "nonimmediate_operand" "")
4924 (parallel [(const_int 0)]))
4925 (match_operand:DF 2 "nonimmediate_operand" "")))]
4927 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4929 ;; Avoid combining registers from different units in a single alternative,
4930 ;; see comment above inline_secondary_memory_needed function in i386.c
4931 (define_insn "*avx_loadhpd"
4932 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4935 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4936 (parallel [(const_int 0)]))
4937 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4938 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4940 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4941 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4945 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4946 (set_attr "prefix" "vex")
4947 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4949 (define_insn "sse2_loadhpd"
4950 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4953 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4954 (parallel [(const_int 0)]))
4955 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4956 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4958 movhpd\t{%2, %0|%0, %2}
4959 unpcklpd\t{%2, %0|%0, %2}
4960 shufpd\t{$1, %1, %0|%0, %1, 1}
4964 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4965 (set_attr "prefix_data16" "1,*,*,*,*,*")
4966 (set_attr "length_immediate" "*,*,1,*,*,*")
4967 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4970 [(set (match_operand:V2DF 0 "memory_operand" "")
4972 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4973 (match_operand:DF 1 "register_operand" "")))]
4974 "TARGET_SSE2 && reload_completed"
4975 [(set (match_dup 0) (match_dup 1))]
4977 operands[0] = adjust_address (operands[0], DFmode, 8);
4980 (define_expand "sse2_loadlpd_exp"
4981 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4983 (match_operand:DF 2 "nonimmediate_operand" "")
4985 (match_operand:V2DF 1 "nonimmediate_operand" "")
4986 (parallel [(const_int 1)]))))]
4988 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4990 ;; Avoid combining registers from different units in a single alternative,
4991 ;; see comment above inline_secondary_memory_needed function in i386.c
4992 (define_insn "*avx_loadlpd"
4993 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4995 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4997 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4998 (parallel [(const_int 1)]))))]
4999 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5001 vmovsd\t{%2, %0|%0, %2}
5002 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5003 vmovsd\t{%2, %1, %0|%0, %1, %2}
5004 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5008 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5009 (set_attr "prefix" "vex")
5010 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5012 (define_insn "sse2_loadlpd"
5013 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5015 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5017 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5018 (parallel [(const_int 1)]))))]
5019 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5021 movsd\t{%2, %0|%0, %2}
5022 movlpd\t{%2, %0|%0, %2}
5023 movsd\t{%2, %0|%0, %2}
5024 shufpd\t{$2, %2, %0|%0, %2, 2}
5025 movhpd\t{%H1, %0|%0, %H1}
5029 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5030 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5031 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5032 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5035 [(set (match_operand:V2DF 0 "memory_operand" "")
5037 (match_operand:DF 1 "register_operand" "")
5038 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5039 "TARGET_SSE2 && reload_completed"
5040 [(set (match_dup 0) (match_dup 1))]
5042 operands[0] = adjust_address (operands[0], DFmode, 8);
5045 ;; Not sure these two are ever used, but it doesn't hurt to have
5047 (define_insn "*vec_extractv2df_1_sse"
5048 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5050 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5051 (parallel [(const_int 1)])))]
5052 "!TARGET_SSE2 && TARGET_SSE
5053 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5055 movhps\t{%1, %0|%0, %1}
5056 movhlps\t{%1, %0|%0, %1}
5057 movlps\t{%H1, %0|%0, %H1}"
5058 [(set_attr "type" "ssemov")
5059 (set_attr "mode" "V2SF,V4SF,V2SF")])
5061 (define_insn "*vec_extractv2df_0_sse"
5062 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5064 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5065 (parallel [(const_int 0)])))]
5066 "!TARGET_SSE2 && TARGET_SSE
5067 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5069 movlps\t{%1, %0|%0, %1}
5070 movaps\t{%1, %0|%0, %1}
5071 movlps\t{%1, %0|%0, %1}"
5072 [(set_attr "type" "ssemov")
5073 (set_attr "mode" "V2SF,V4SF,V2SF")])
5075 (define_insn "*avx_movsd"
5076 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5078 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5079 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5083 vmovsd\t{%2, %1, %0|%0, %1, %2}
5084 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5085 vmovlpd\t{%2, %0|%0, %2}
5086 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5087 vmovhps\t{%1, %H0|%H0, %1}"
5088 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5089 (set_attr "prefix" "vex")
5090 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5092 (define_insn "sse2_movsd"
5093 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5095 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5096 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5100 movsd\t{%2, %0|%0, %2}
5101 movlpd\t{%2, %0|%0, %2}
5102 movlpd\t{%2, %0|%0, %2}
5103 shufpd\t{$2, %2, %0|%0, %2, 2}
5104 movhps\t{%H1, %0|%0, %H1}
5105 movhps\t{%1, %H0|%H0, %1}"
5106 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5107 (set_attr "prefix_data16" "*,1,1,*,*,*")
5108 (set_attr "length_immediate" "*,*,*,1,*,*")
5109 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5111 (define_insn "*vec_dupv2df_sse3"
5112 [(set (match_operand:V2DF 0 "register_operand" "=x")
5114 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5116 "%vmovddup\t{%1, %0|%0, %1}"
5117 [(set_attr "type" "sselog1")
5118 (set_attr "prefix" "maybe_vex")
5119 (set_attr "mode" "DF")])
5121 (define_insn "vec_dupv2df"
5122 [(set (match_operand:V2DF 0 "register_operand" "=x")
5124 (match_operand:DF 1 "register_operand" "0")))]
5127 [(set_attr "type" "sselog1")
5128 (set_attr "mode" "V2DF")])
5130 (define_insn "*vec_concatv2df_sse3"
5131 [(set (match_operand:V2DF 0 "register_operand" "=x")
5133 (match_operand:DF 1 "nonimmediate_operand" "xm")
5136 "%vmovddup\t{%1, %0|%0, %1}"
5137 [(set_attr "type" "sselog1")
5138 (set_attr "prefix" "maybe_vex")
5139 (set_attr "mode" "DF")])
5141 (define_insn "*vec_concatv2df_avx"
5142 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5144 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5145 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5148 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5149 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5150 vmovsd\t{%1, %0|%0, %1}"
5151 [(set_attr "type" "ssemov")
5152 (set_attr "prefix" "vex")
5153 (set_attr "mode" "DF,V1DF,DF")])
5155 (define_insn "*vec_concatv2df"
5156 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5158 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5159 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5162 unpcklpd\t{%2, %0|%0, %2}
5163 movhpd\t{%2, %0|%0, %2}
5164 movsd\t{%1, %0|%0, %1}
5165 movlhps\t{%2, %0|%0, %2}
5166 movhps\t{%2, %0|%0, %2}"
5167 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5168 (set_attr "prefix_data16" "*,1,*,*,*")
5169 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5173 ;; Parallel integral arithmetic
5175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5177 (define_expand "neg<mode>2"
5178 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5181 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5183 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5185 (define_expand "<plusminus_insn><mode>3"
5186 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5188 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5189 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5191 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5193 (define_insn "*avx_<plusminus_insn><mode>3"
5194 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5196 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5197 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5198 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5199 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5200 [(set_attr "type" "sseiadd")
5201 (set_attr "prefix" "vex")
5202 (set_attr "mode" "TI")])
5204 (define_insn "*<plusminus_insn><mode>3"
5205 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5207 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5208 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5209 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5210 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5211 [(set_attr "type" "sseiadd")
5212 (set_attr "prefix_data16" "1")
5213 (set_attr "mode" "TI")])
5215 (define_expand "sse2_<plusminus_insn><mode>3"
5216 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5217 (sat_plusminus:SSEMODE12
5218 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5219 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5221 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5223 (define_insn "*avx_<plusminus_insn><mode>3"
5224 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5225 (sat_plusminus:SSEMODE12
5226 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5227 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5229 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5230 [(set_attr "type" "sseiadd")
5231 (set_attr "prefix" "vex")
5232 (set_attr "mode" "TI")])
5234 (define_insn "*sse2_<plusminus_insn><mode>3"
5235 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5236 (sat_plusminus:SSEMODE12
5237 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5238 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5239 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5240 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseiadd")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5245 (define_insn_and_split "mulv16qi3"
5246 [(set (match_operand:V16QI 0 "register_operand" "")
5247 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5248 (match_operand:V16QI 2 "register_operand" "")))]
5250 && can_create_pseudo_p ()"
5258 for (i = 0; i < 12; ++i)
5259 t[i] = gen_reg_rtx (V16QImode);
5261 /* Unpack data such that we've got a source byte in each low byte of
5262 each word. We don't care what goes into the high byte of each word.
5263 Rather than trying to get zero in there, most convenient is to let
5264 it be a copy of the low byte. */
5265 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
5266 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
5267 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
5268 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
5270 /* Multiply words. The end-of-line annotations here give a picture of what
5271 the output of that instruction looks like. Dot means don't care; the
5272 letters are the bytes of the result with A being the most significant. */
5273 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5274 gen_lowpart (V8HImode, t[0]),
5275 gen_lowpart (V8HImode, t[1])));
5276 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5277 gen_lowpart (V8HImode, t[2]),
5278 gen_lowpart (V8HImode, t[3])));
5280 /* Extract the relevant bytes and merge them back together. */
5281 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
5282 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
5283 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
5284 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
5285 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
5286 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
5288 emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */
5292 (define_expand "mulv8hi3"
5293 [(set (match_operand:V8HI 0 "register_operand" "")
5294 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5295 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5297 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5299 (define_insn "*avx_mulv8hi3"
5300 [(set (match_operand:V8HI 0 "register_operand" "=x")
5301 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5302 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5303 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5304 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5305 [(set_attr "type" "sseimul")
5306 (set_attr "prefix" "vex")
5307 (set_attr "mode" "TI")])
5309 (define_insn "*mulv8hi3"
5310 [(set (match_operand:V8HI 0 "register_operand" "=x")
5311 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5312 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5313 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5314 "pmullw\t{%2, %0|%0, %2}"
5315 [(set_attr "type" "sseimul")
5316 (set_attr "prefix_data16" "1")
5317 (set_attr "mode" "TI")])
5319 (define_expand "smulv8hi3_highpart"
5320 [(set (match_operand:V8HI 0 "register_operand" "")
5325 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5327 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5330 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5332 (define_insn "*avxv8hi3_highpart"
5333 [(set (match_operand:V8HI 0 "register_operand" "=x")
5338 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5340 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5342 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5343 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5344 [(set_attr "type" "sseimul")
5345 (set_attr "prefix" "vex")
5346 (set_attr "mode" "TI")])
5348 (define_insn "*smulv8hi3_highpart"
5349 [(set (match_operand:V8HI 0 "register_operand" "=x")
5354 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5356 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5358 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5359 "pmulhw\t{%2, %0|%0, %2}"
5360 [(set_attr "type" "sseimul")
5361 (set_attr "prefix_data16" "1")
5362 (set_attr "mode" "TI")])
5364 (define_expand "umulv8hi3_highpart"
5365 [(set (match_operand:V8HI 0 "register_operand" "")
5370 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5372 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5375 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5377 (define_insn "*avx_umulv8hi3_highpart"
5378 [(set (match_operand:V8HI 0 "register_operand" "=x")
5383 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5385 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5387 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5388 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5389 [(set_attr "type" "sseimul")
5390 (set_attr "prefix" "vex")
5391 (set_attr "mode" "TI")])
5393 (define_insn "*umulv8hi3_highpart"
5394 [(set (match_operand:V8HI 0 "register_operand" "=x")
5399 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5401 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5403 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5404 "pmulhuw\t{%2, %0|%0, %2}"
5405 [(set_attr "type" "sseimul")
5406 (set_attr "prefix_data16" "1")
5407 (set_attr "mode" "TI")])
5409 (define_expand "sse2_umulv2siv2di3"
5410 [(set (match_operand:V2DI 0 "register_operand" "")
5414 (match_operand:V4SI 1 "nonimmediate_operand" "")
5415 (parallel [(const_int 0) (const_int 2)])))
5418 (match_operand:V4SI 2 "nonimmediate_operand" "")
5419 (parallel [(const_int 0) (const_int 2)])))))]
5421 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5423 (define_insn "*avx_umulv2siv2di3"
5424 [(set (match_operand:V2DI 0 "register_operand" "=x")
5428 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5429 (parallel [(const_int 0) (const_int 2)])))
5432 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5433 (parallel [(const_int 0) (const_int 2)])))))]
5434 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5435 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5436 [(set_attr "type" "sseimul")
5437 (set_attr "prefix" "vex")
5438 (set_attr "mode" "TI")])
5440 (define_insn "*sse2_umulv2siv2di3"
5441 [(set (match_operand:V2DI 0 "register_operand" "=x")
5445 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5446 (parallel [(const_int 0) (const_int 2)])))
5449 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5450 (parallel [(const_int 0) (const_int 2)])))))]
5451 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5452 "pmuludq\t{%2, %0|%0, %2}"
5453 [(set_attr "type" "sseimul")
5454 (set_attr "prefix_data16" "1")
5455 (set_attr "mode" "TI")])
5457 (define_expand "sse4_1_mulv2siv2di3"
5458 [(set (match_operand:V2DI 0 "register_operand" "")
5462 (match_operand:V4SI 1 "nonimmediate_operand" "")
5463 (parallel [(const_int 0) (const_int 2)])))
5466 (match_operand:V4SI 2 "nonimmediate_operand" "")
5467 (parallel [(const_int 0) (const_int 2)])))))]
5469 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5471 (define_insn "*avx_mulv2siv2di3"
5472 [(set (match_operand:V2DI 0 "register_operand" "=x")
5476 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5477 (parallel [(const_int 0) (const_int 2)])))
5480 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5481 (parallel [(const_int 0) (const_int 2)])))))]
5482 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5483 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5484 [(set_attr "type" "sseimul")
5485 (set_attr "prefix_extra" "1")
5486 (set_attr "prefix" "vex")
5487 (set_attr "mode" "TI")])
5489 (define_insn "*sse4_1_mulv2siv2di3"
5490 [(set (match_operand:V2DI 0 "register_operand" "=x")
5494 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5495 (parallel [(const_int 0) (const_int 2)])))
5498 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5499 (parallel [(const_int 0) (const_int 2)])))))]
5500 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5501 "pmuldq\t{%2, %0|%0, %2}"
5502 [(set_attr "type" "sseimul")
5503 (set_attr "prefix_extra" "1")
5504 (set_attr "mode" "TI")])
5506 (define_expand "sse2_pmaddwd"
5507 [(set (match_operand:V4SI 0 "register_operand" "")
5512 (match_operand:V8HI 1 "nonimmediate_operand" "")
5513 (parallel [(const_int 0)
5519 (match_operand:V8HI 2 "nonimmediate_operand" "")
5520 (parallel [(const_int 0)
5526 (vec_select:V4HI (match_dup 1)
5527 (parallel [(const_int 1)
5532 (vec_select:V4HI (match_dup 2)
5533 (parallel [(const_int 1)
5536 (const_int 7)]))))))]
5538 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5540 (define_insn "*avx_pmaddwd"
5541 [(set (match_operand:V4SI 0 "register_operand" "=x")
5546 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5547 (parallel [(const_int 0)
5553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5554 (parallel [(const_int 0)
5560 (vec_select:V4HI (match_dup 1)
5561 (parallel [(const_int 1)
5566 (vec_select:V4HI (match_dup 2)
5567 (parallel [(const_int 1)
5570 (const_int 7)]))))))]
5571 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5572 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5573 [(set_attr "type" "sseiadd")
5574 (set_attr "prefix" "vex")
5575 (set_attr "mode" "TI")])
5577 (define_insn "*sse2_pmaddwd"
5578 [(set (match_operand:V4SI 0 "register_operand" "=x")
5583 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5584 (parallel [(const_int 0)
5590 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5591 (parallel [(const_int 0)
5597 (vec_select:V4HI (match_dup 1)
5598 (parallel [(const_int 1)
5603 (vec_select:V4HI (match_dup 2)
5604 (parallel [(const_int 1)
5607 (const_int 7)]))))))]
5608 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5609 "pmaddwd\t{%2, %0|%0, %2}"
5610 [(set_attr "type" "sseiadd")
5611 (set_attr "atom_unit" "simul")
5612 (set_attr "prefix_data16" "1")
5613 (set_attr "mode" "TI")])
5615 (define_expand "mulv4si3"
5616 [(set (match_operand:V4SI 0 "register_operand" "")
5617 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5618 (match_operand:V4SI 2 "register_operand" "")))]
5621 if (TARGET_SSE4_1 || TARGET_XOP)
5622 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5625 (define_insn "*avx_mulv4si3"
5626 [(set (match_operand:V4SI 0 "register_operand" "=x")
5627 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5628 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5629 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5630 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5631 [(set_attr "type" "sseimul")
5632 (set_attr "prefix_extra" "1")
5633 (set_attr "prefix" "vex")
5634 (set_attr "mode" "TI")])
5636 (define_insn "*sse4_1_mulv4si3"
5637 [(set (match_operand:V4SI 0 "register_operand" "=x")
5638 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5639 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5640 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5641 "pmulld\t{%2, %0|%0, %2}"
5642 [(set_attr "type" "sseimul")
5643 (set_attr "prefix_extra" "1")
5644 (set_attr "mode" "TI")])
5646 (define_insn_and_split "*sse2_mulv4si3"
5647 [(set (match_operand:V4SI 0 "register_operand" "")
5648 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5649 (match_operand:V4SI 2 "register_operand" "")))]
5650 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
5651 && can_create_pseudo_p ()"
5656 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5662 t1 = gen_reg_rtx (V4SImode);
5663 t2 = gen_reg_rtx (V4SImode);
5664 t3 = gen_reg_rtx (V4SImode);
5665 t4 = gen_reg_rtx (V4SImode);
5666 t5 = gen_reg_rtx (V4SImode);
5667 t6 = gen_reg_rtx (V4SImode);
5668 thirtytwo = GEN_INT (32);
5670 /* Multiply elements 2 and 0. */
5671 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5674 /* Shift both input vectors down one element, so that elements 3
5675 and 1 are now in the slots for elements 2 and 0. For K8, at
5676 least, this is faster than using a shuffle. */
5677 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5678 gen_lowpart (TImode, op1),
5680 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5681 gen_lowpart (TImode, op2),
5683 /* Multiply elements 3 and 1. */
5684 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5687 /* Move the results in element 2 down to element 1; we don't care
5688 what goes in elements 2 and 3. */
5689 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5690 const0_rtx, const0_rtx));
5691 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5692 const0_rtx, const0_rtx));
5694 /* Merge the parts back together. */
5695 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5699 (define_insn_and_split "mulv2di3"
5700 [(set (match_operand:V2DI 0 "register_operand" "")
5701 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5702 (match_operand:V2DI 2 "register_operand" "")))]
5704 && can_create_pseudo_p ()"
5709 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5714 /* op1: A,B,C,D, op2: E,F,G,H */
5716 op1 = gen_lowpart (V4SImode, operands[1]);
5717 op2 = gen_lowpart (V4SImode, operands[2]);
5718 t1 = gen_reg_rtx (V4SImode);
5719 t2 = gen_reg_rtx (V4SImode);
5720 t3 = gen_reg_rtx (V4SImode);
5721 t4 = gen_reg_rtx (V2DImode);
5722 t5 = gen_reg_rtx (V2DImode);
5725 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5732 emit_move_insn (t2, CONST0_RTX (V4SImode));
5734 /* t3: (B*E),(A*F),(D*G),(C*H) */
5735 emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
5737 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5738 emit_insn (gen_xop_phadddq (t4, t3));
5740 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5741 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5743 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5744 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
5751 t1 = gen_reg_rtx (V2DImode);
5752 t2 = gen_reg_rtx (V2DImode);
5753 t3 = gen_reg_rtx (V2DImode);
5754 t4 = gen_reg_rtx (V2DImode);
5755 t5 = gen_reg_rtx (V2DImode);
5756 t6 = gen_reg_rtx (V2DImode);
5757 thirtytwo = GEN_INT (32);
5759 /* Multiply low parts. */
5760 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5761 gen_lowpart (V4SImode, op2)));
5763 /* Shift input vectors left 32 bits so we can multiply high parts. */
5764 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5765 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5767 /* Multiply high parts by low parts. */
5768 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5769 gen_lowpart (V4SImode, t3)));
5770 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5771 gen_lowpart (V4SImode, t2)));
5773 /* Shift them back. */
5774 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5775 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5777 /* Add the three parts together. */
5778 emit_insn (gen_addv2di3 (t6, t1, t4));
5779 emit_insn (gen_addv2di3 (op0, t6, t5));
5783 (define_expand "vec_widen_smult_hi_v8hi"
5784 [(match_operand:V4SI 0 "register_operand" "")
5785 (match_operand:V8HI 1 "register_operand" "")
5786 (match_operand:V8HI 2 "register_operand" "")]
5789 rtx op1, op2, t1, t2, dest;
5793 t1 = gen_reg_rtx (V8HImode);
5794 t2 = gen_reg_rtx (V8HImode);
5795 dest = gen_lowpart (V8HImode, operands[0]);
5797 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5798 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5799 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5803 (define_expand "vec_widen_smult_lo_v8hi"
5804 [(match_operand:V4SI 0 "register_operand" "")
5805 (match_operand:V8HI 1 "register_operand" "")
5806 (match_operand:V8HI 2 "register_operand" "")]
5809 rtx op1, op2, t1, t2, dest;
5813 t1 = gen_reg_rtx (V8HImode);
5814 t2 = gen_reg_rtx (V8HImode);
5815 dest = gen_lowpart (V8HImode, operands[0]);
5817 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5818 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5819 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5823 (define_expand "vec_widen_umult_hi_v8hi"
5824 [(match_operand:V4SI 0 "register_operand" "")
5825 (match_operand:V8HI 1 "register_operand" "")
5826 (match_operand:V8HI 2 "register_operand" "")]
5829 rtx op1, op2, t1, t2, dest;
5833 t1 = gen_reg_rtx (V8HImode);
5834 t2 = gen_reg_rtx (V8HImode);
5835 dest = gen_lowpart (V8HImode, operands[0]);
5837 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5838 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5839 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5843 (define_expand "vec_widen_umult_lo_v8hi"
5844 [(match_operand:V4SI 0 "register_operand" "")
5845 (match_operand:V8HI 1 "register_operand" "")
5846 (match_operand:V8HI 2 "register_operand" "")]
5849 rtx op1, op2, t1, t2, dest;
5853 t1 = gen_reg_rtx (V8HImode);
5854 t2 = gen_reg_rtx (V8HImode);
5855 dest = gen_lowpart (V8HImode, operands[0]);
5857 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5858 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5859 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5863 (define_expand "vec_widen_smult_hi_v4si"
5864 [(match_operand:V2DI 0 "register_operand" "")
5865 (match_operand:V4SI 1 "register_operand" "")
5866 (match_operand:V4SI 2 "register_operand" "")]
5871 t1 = gen_reg_rtx (V4SImode);
5872 t2 = gen_reg_rtx (V4SImode);
5874 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5879 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5884 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5888 (define_expand "vec_widen_smult_lo_v4si"
5889 [(match_operand:V2DI 0 "register_operand" "")
5890 (match_operand:V4SI 1 "register_operand" "")
5891 (match_operand:V4SI 2 "register_operand" "")]
5896 t1 = gen_reg_rtx (V4SImode);
5897 t2 = gen_reg_rtx (V4SImode);
5899 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5904 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5909 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5913 (define_expand "vec_widen_umult_hi_v4si"
5914 [(match_operand:V2DI 0 "register_operand" "")
5915 (match_operand:V4SI 1 "register_operand" "")
5916 (match_operand:V4SI 2 "register_operand" "")]
5919 rtx op1, op2, t1, t2;
5923 t1 = gen_reg_rtx (V4SImode);
5924 t2 = gen_reg_rtx (V4SImode);
5926 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5927 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5928 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5932 (define_expand "vec_widen_umult_lo_v4si"
5933 [(match_operand:V2DI 0 "register_operand" "")
5934 (match_operand:V4SI 1 "register_operand" "")
5935 (match_operand:V4SI 2 "register_operand" "")]
5938 rtx op1, op2, t1, t2;
5942 t1 = gen_reg_rtx (V4SImode);
5943 t2 = gen_reg_rtx (V4SImode);
5945 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5946 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5947 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5951 (define_expand "sdot_prodv8hi"
5952 [(match_operand:V4SI 0 "register_operand" "")
5953 (match_operand:V8HI 1 "register_operand" "")
5954 (match_operand:V8HI 2 "register_operand" "")
5955 (match_operand:V4SI 3 "register_operand" "")]
5958 rtx t = gen_reg_rtx (V4SImode);
5959 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5960 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5964 (define_expand "udot_prodv4si"
5965 [(match_operand:V2DI 0 "register_operand" "")
5966 (match_operand:V4SI 1 "register_operand" "")
5967 (match_operand:V4SI 2 "register_operand" "")
5968 (match_operand:V2DI 3 "register_operand" "")]
5973 t1 = gen_reg_rtx (V2DImode);
5974 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5975 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5977 t2 = gen_reg_rtx (V4SImode);
5978 t3 = gen_reg_rtx (V4SImode);
5979 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5980 gen_lowpart (TImode, operands[1]),
5982 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5983 gen_lowpart (TImode, operands[2]),
5986 t4 = gen_reg_rtx (V2DImode);
5987 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5989 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5993 (define_insn "*avx_ashr<mode>3"
5994 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5996 (match_operand:SSEMODE24 1 "register_operand" "x")
5997 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5999 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6000 [(set_attr "type" "sseishft")
6001 (set_attr "prefix" "vex")
6002 (set (attr "length_immediate")
6003 (if_then_else (match_operand 2 "const_int_operand" "")
6005 (const_string "0")))
6006 (set_attr "mode" "TI")])
6008 (define_insn "ashr<mode>3"
6009 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6011 (match_operand:SSEMODE24 1 "register_operand" "0")
6012 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6014 "psra<ssevecsize>\t{%2, %0|%0, %2}"
6015 [(set_attr "type" "sseishft")
6016 (set_attr "prefix_data16" "1")
6017 (set (attr "length_immediate")
6018 (if_then_else (match_operand 2 "const_int_operand" "")
6020 (const_string "0")))
6021 (set_attr "mode" "TI")])
6023 (define_insn "*avx_lshr<mode>3"
6024 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6025 (lshiftrt:SSEMODE248
6026 (match_operand:SSEMODE248 1 "register_operand" "x")
6027 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6029 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6030 [(set_attr "type" "sseishft")
6031 (set_attr "prefix" "vex")
6032 (set (attr "length_immediate")
6033 (if_then_else (match_operand 2 "const_int_operand" "")
6035 (const_string "0")))
6036 (set_attr "mode" "TI")])
6038 (define_insn "lshr<mode>3"
6039 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6040 (lshiftrt:SSEMODE248
6041 (match_operand:SSEMODE248 1 "register_operand" "0")
6042 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6044 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6045 [(set_attr "type" "sseishft")
6046 (set_attr "prefix_data16" "1")
6047 (set (attr "length_immediate")
6048 (if_then_else (match_operand 2 "const_int_operand" "")
6050 (const_string "0")))
6051 (set_attr "mode" "TI")])
6053 (define_insn "*avx_ashl<mode>3"
6054 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6056 (match_operand:SSEMODE248 1 "register_operand" "x")
6057 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6059 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6060 [(set_attr "type" "sseishft")
6061 (set_attr "prefix" "vex")
6062 (set (attr "length_immediate")
6063 (if_then_else (match_operand 2 "const_int_operand" "")
6065 (const_string "0")))
6066 (set_attr "mode" "TI")])
6068 (define_insn "ashl<mode>3"
6069 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6071 (match_operand:SSEMODE248 1 "register_operand" "0")
6072 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6074 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6075 [(set_attr "type" "sseishft")
6076 (set_attr "prefix_data16" "1")
6077 (set (attr "length_immediate")
6078 (if_then_else (match_operand 2 "const_int_operand" "")
6080 (const_string "0")))
6081 (set_attr "mode" "TI")])
6083 (define_expand "vec_shl_<mode>"
6084 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6085 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
6086 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6089 operands[0] = gen_lowpart (TImode, operands[0]);
6090 operands[1] = gen_lowpart (TImode, operands[1]);
6093 (define_expand "vec_shr_<mode>"
6094 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6095 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
6096 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6099 operands[0] = gen_lowpart (TImode, operands[0]);
6100 operands[1] = gen_lowpart (TImode, operands[1]);
6103 (define_insn "*avx_<code><mode>3"
6104 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6106 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6107 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6108 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6109 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6110 [(set_attr "type" "sseiadd")
6111 (set (attr "prefix_extra")
6113 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6116 (const_string "0")))
6117 (set_attr "prefix" "vex")
6118 (set_attr "mode" "TI")])
6120 (define_expand "<code>v16qi3"
6121 [(set (match_operand:V16QI 0 "register_operand" "")
6123 (match_operand:V16QI 1 "nonimmediate_operand" "")
6124 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6126 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6128 (define_insn "*<code>v16qi3"
6129 [(set (match_operand:V16QI 0 "register_operand" "=x")
6131 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6132 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6133 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6134 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
6135 [(set_attr "type" "sseiadd")
6136 (set_attr "prefix_data16" "1")
6137 (set_attr "mode" "TI")])
6139 (define_expand "<code>v8hi3"
6140 [(set (match_operand:V8HI 0 "register_operand" "")
6142 (match_operand:V8HI 1 "nonimmediate_operand" "")
6143 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6145 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6147 (define_insn "*<code>v8hi3"
6148 [(set (match_operand:V8HI 0 "register_operand" "=x")
6150 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6151 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6152 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6153 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
6154 [(set_attr "type" "sseiadd")
6155 (set_attr "prefix_data16" "1")
6156 (set_attr "mode" "TI")])
6158 (define_expand "umaxv8hi3"
6159 [(set (match_operand:V8HI 0 "register_operand" "")
6160 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6161 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6165 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6168 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6169 if (rtx_equal_p (op3, op2))
6170 op3 = gen_reg_rtx (V8HImode);
6171 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6172 emit_insn (gen_addv8hi3 (op0, op3, op2));
6177 (define_expand "smax<mode>3"
6178 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6179 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6180 (match_operand:SSEMODE14 2 "register_operand" "")))]
6184 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6190 xops[0] = operands[0];
6191 xops[1] = operands[1];
6192 xops[2] = operands[2];
6193 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6194 xops[4] = operands[1];
6195 xops[5] = operands[2];
6196 ok = ix86_expand_int_vcond (xops);
6202 (define_insn "*sse4_1_<code><mode>3"
6203 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6205 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6206 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6207 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6208 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6209 [(set_attr "type" "sseiadd")
6210 (set_attr "prefix_extra" "1")
6211 (set_attr "mode" "TI")])
6213 (define_expand "umaxv4si3"
6214 [(set (match_operand:V4SI 0 "register_operand" "")
6215 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6216 (match_operand:V4SI 2 "register_operand" "")))]
6220 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6226 xops[0] = operands[0];
6227 xops[1] = operands[1];
6228 xops[2] = operands[2];
6229 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6230 xops[4] = operands[1];
6231 xops[5] = operands[2];
6232 ok = ix86_expand_int_vcond (xops);
6238 (define_insn "*sse4_1_<code><mode>3"
6239 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6241 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6242 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6243 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6244 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6245 [(set_attr "type" "sseiadd")
6246 (set_attr "prefix_extra" "1")
6247 (set_attr "mode" "TI")])
6249 (define_expand "smin<mode>3"
6250 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6251 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6252 (match_operand:SSEMODE14 2 "register_operand" "")))]
6256 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6262 xops[0] = operands[0];
6263 xops[1] = operands[2];
6264 xops[2] = operands[1];
6265 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6266 xops[4] = operands[1];
6267 xops[5] = operands[2];
6268 ok = ix86_expand_int_vcond (xops);
6274 (define_expand "umin<mode>3"
6275 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6276 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6277 (match_operand:SSEMODE24 2 "register_operand" "")))]
6281 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6287 xops[0] = operands[0];
6288 xops[1] = operands[2];
6289 xops[2] = operands[1];
6290 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6291 xops[4] = operands[1];
6292 xops[5] = operands[2];
6293 ok = ix86_expand_int_vcond (xops);
6299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6301 ;; Parallel integral comparisons
6303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6305 (define_expand "sse2_eq<mode>3"
6306 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6308 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6309 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6310 "TARGET_SSE2 && !TARGET_XOP "
6311 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6313 (define_insn "*avx_eq<mode>3"
6314 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6316 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6317 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6318 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6319 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6320 [(set_attr "type" "ssecmp")
6321 (set (attr "prefix_extra")
6322 (if_then_else (match_operand:V2DI 0 "" "")
6324 (const_string "*")))
6325 (set_attr "prefix" "vex")
6326 (set_attr "mode" "TI")])
6328 (define_insn "*sse2_eq<mode>3"
6329 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6331 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6332 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6333 "TARGET_SSE2 && !TARGET_XOP
6334 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6335 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6336 [(set_attr "type" "ssecmp")
6337 (set_attr "prefix_data16" "1")
6338 (set_attr "mode" "TI")])
6340 (define_expand "sse4_1_eqv2di3"
6341 [(set (match_operand:V2DI 0 "register_operand" "")
6343 (match_operand:V2DI 1 "nonimmediate_operand" "")
6344 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6346 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6348 (define_insn "*sse4_1_eqv2di3"
6349 [(set (match_operand:V2DI 0 "register_operand" "=x")
6351 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6352 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6353 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6354 "pcmpeqq\t{%2, %0|%0, %2}"
6355 [(set_attr "type" "ssecmp")
6356 (set_attr "prefix_extra" "1")
6357 (set_attr "mode" "TI")])
6359 (define_insn "*avx_gt<mode>3"
6360 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6362 (match_operand:SSEMODE1248 1 "register_operand" "x")
6363 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6365 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6366 [(set_attr "type" "ssecmp")
6367 (set (attr "prefix_extra")
6368 (if_then_else (match_operand:V2DI 0 "" "")
6370 (const_string "*")))
6371 (set_attr "prefix" "vex")
6372 (set_attr "mode" "TI")])
6374 (define_insn "sse2_gt<mode>3"
6375 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6377 (match_operand:SSEMODE124 1 "register_operand" "0")
6378 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6379 "TARGET_SSE2 && !TARGET_XOP"
6380 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6381 [(set_attr "type" "ssecmp")
6382 (set_attr "prefix_data16" "1")
6383 (set_attr "mode" "TI")])
6385 (define_insn "sse4_2_gtv2di3"
6386 [(set (match_operand:V2DI 0 "register_operand" "=x")
6388 (match_operand:V2DI 1 "register_operand" "0")
6389 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6391 "pcmpgtq\t{%2, %0|%0, %2}"
6392 [(set_attr "type" "ssecmp")
6393 (set_attr "prefix_extra" "1")
6394 (set_attr "mode" "TI")])
6396 (define_expand "vcond<mode>"
6397 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6398 (if_then_else:SSEMODE124C8
6399 (match_operator 3 ""
6400 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6401 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6402 (match_operand:SSEMODE124C8 1 "general_operand" "")
6403 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6406 bool ok = ix86_expand_int_vcond (operands);
6411 (define_expand "vcondu<mode>"
6412 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6413 (if_then_else:SSEMODE124C8
6414 (match_operator 3 ""
6415 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6416 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6417 (match_operand:SSEMODE124C8 1 "general_operand" "")
6418 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6421 bool ok = ix86_expand_int_vcond (operands);
6426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6428 ;; Parallel bitwise logical operations
6430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6432 (define_expand "one_cmpl<mode>2"
6433 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6434 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6438 int i, n = GET_MODE_NUNITS (<MODE>mode);
6439 rtvec v = rtvec_alloc (n);
6441 for (i = 0; i < n; ++i)
6442 RTVEC_ELT (v, i) = constm1_rtx;
6444 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6447 (define_insn "*avx_andnot<mode>3"
6448 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6450 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6451 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6453 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6454 [(set_attr "type" "sselog")
6455 (set_attr "prefix" "vex")
6456 (set_attr "mode" "<avxvecpsmode>")])
6458 (define_insn "*sse_andnot<mode>3"
6459 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6461 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6462 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6463 "(TARGET_SSE && !TARGET_SSE2)"
6464 "andnps\t{%2, %0|%0, %2}"
6465 [(set_attr "type" "sselog")
6466 (set_attr "mode" "V4SF")])
6468 (define_insn "*avx_andnot<mode>3"
6469 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6471 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6472 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6474 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6475 [(set_attr "type" "sselog")
6476 (set_attr "prefix" "vex")
6477 (set_attr "mode" "TI")])
6479 (define_insn "sse2_andnot<mode>3"
6480 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6482 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6483 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6485 "pandn\t{%2, %0|%0, %2}"
6486 [(set_attr "type" "sselog")
6487 (set_attr "prefix_data16" "1")
6488 (set_attr "mode" "TI")])
6490 (define_insn "*andnottf3"
6491 [(set (match_operand:TF 0 "register_operand" "=x")
6493 (not:TF (match_operand:TF 1 "register_operand" "0"))
6494 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6496 "pandn\t{%2, %0|%0, %2}"
6497 [(set_attr "type" "sselog")
6498 (set_attr "prefix_data16" "1")
6499 (set_attr "mode" "TI")])
6501 (define_expand "<code><mode>3"
6502 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6504 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6505 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6507 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6509 (define_insn "*avx_<code><mode>3"
6510 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6512 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6513 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6515 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6516 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6517 [(set_attr "type" "sselog")
6518 (set_attr "prefix" "vex")
6519 (set_attr "mode" "<avxvecpsmode>")])
6521 (define_insn "*sse_<code><mode>3"
6522 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6524 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6525 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6526 "(TARGET_SSE && !TARGET_SSE2)
6527 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6528 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6529 [(set_attr "type" "sselog")
6530 (set_attr "mode" "V4SF")])
6532 (define_insn "*avx_<code><mode>3"
6533 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6535 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6536 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6539 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6540 [(set_attr "type" "sselog")
6541 (set_attr "prefix" "vex")
6542 (set_attr "mode" "TI")])
6544 (define_insn "*sse2_<code><mode>3"
6545 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6547 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6548 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6549 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6550 "p<plogicprefix>\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "prefix_data16" "1")
6553 (set_attr "mode" "TI")])
6555 (define_expand "<code>tf3"
6556 [(set (match_operand:TF 0 "register_operand" "")
6558 (match_operand:TF 1 "nonimmediate_operand" "")
6559 (match_operand:TF 2 "nonimmediate_operand" "")))]
6561 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6563 (define_insn "*<code>tf3"
6564 [(set (match_operand:TF 0 "register_operand" "=x")
6566 (match_operand:TF 1 "nonimmediate_operand" "%0")
6567 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6568 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6569 "p<plogicprefix>\t{%2, %0|%0, %2}"
6570 [(set_attr "type" "sselog")
6571 (set_attr "prefix_data16" "1")
6572 (set_attr "mode" "TI")])
6574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6576 ;; Parallel integral element swizzling
6578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6581 ;; op1 = abcdefghijklmnop
6582 ;; op2 = qrstuvwxyz012345
6583 ;; h1 = aqbrcsdteufvgwhx
6584 ;; l1 = iyjzk0l1m2n3o4p5
6585 ;; h2 = aiqybjrzcks0dlt1
6586 ;; l2 = emu2fnv3gow4hpx5
6587 ;; h3 = aeimquy2bfjnrvz3
6588 ;; l3 = cgkosw04dhlptx15
6589 ;; result = bdfhjlnprtvxz135
6590 (define_expand "vec_pack_trunc_v8hi"
6591 [(match_operand:V16QI 0 "register_operand" "")
6592 (match_operand:V8HI 1 "register_operand" "")
6593 (match_operand:V8HI 2 "register_operand" "")]
6596 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6598 op1 = gen_lowpart (V16QImode, operands[1]);
6599 op2 = gen_lowpart (V16QImode, operands[2]);
6600 h1 = gen_reg_rtx (V16QImode);
6601 l1 = gen_reg_rtx (V16QImode);
6602 h2 = gen_reg_rtx (V16QImode);
6603 l2 = gen_reg_rtx (V16QImode);
6604 h3 = gen_reg_rtx (V16QImode);
6605 l3 = gen_reg_rtx (V16QImode);
6607 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6608 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6609 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6610 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6611 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6612 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6613 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6624 ;; result = bdfhjlnp
6625 (define_expand "vec_pack_trunc_v4si"
6626 [(match_operand:V8HI 0 "register_operand" "")
6627 (match_operand:V4SI 1 "register_operand" "")
6628 (match_operand:V4SI 2 "register_operand" "")]
6631 rtx op1, op2, h1, l1, h2, l2;
6633 op1 = gen_lowpart (V8HImode, operands[1]);
6634 op2 = gen_lowpart (V8HImode, operands[2]);
6635 h1 = gen_reg_rtx (V8HImode);
6636 l1 = gen_reg_rtx (V8HImode);
6637 h2 = gen_reg_rtx (V8HImode);
6638 l2 = gen_reg_rtx (V8HImode);
6640 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6641 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6642 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6643 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6644 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6654 (define_expand "vec_pack_trunc_v2di"
6655 [(match_operand:V4SI 0 "register_operand" "")
6656 (match_operand:V2DI 1 "register_operand" "")
6657 (match_operand:V2DI 2 "register_operand" "")]
6660 rtx op1, op2, h1, l1;
6662 op1 = gen_lowpart (V4SImode, operands[1]);
6663 op2 = gen_lowpart (V4SImode, operands[2]);
6664 h1 = gen_reg_rtx (V4SImode);
6665 l1 = gen_reg_rtx (V4SImode);
6667 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6668 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6669 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6673 (define_expand "vec_interleave_highv16qi"
6674 [(set (match_operand:V16QI 0 "register_operand" "")
6677 (match_operand:V16QI 1 "register_operand" "")
6678 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6679 (parallel [(const_int 8) (const_int 24)
6680 (const_int 9) (const_int 25)
6681 (const_int 10) (const_int 26)
6682 (const_int 11) (const_int 27)
6683 (const_int 12) (const_int 28)
6684 (const_int 13) (const_int 29)
6685 (const_int 14) (const_int 30)
6686 (const_int 15) (const_int 31)])))]
6689 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6693 (define_expand "vec_interleave_lowv16qi"
6694 [(set (match_operand:V16QI 0 "register_operand" "")
6697 (match_operand:V16QI 1 "register_operand" "")
6698 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6699 (parallel [(const_int 0) (const_int 16)
6700 (const_int 1) (const_int 17)
6701 (const_int 2) (const_int 18)
6702 (const_int 3) (const_int 19)
6703 (const_int 4) (const_int 20)
6704 (const_int 5) (const_int 21)
6705 (const_int 6) (const_int 22)
6706 (const_int 7) (const_int 23)])))]
6709 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6713 (define_expand "vec_interleave_highv8hi"
6714 [(set (match_operand:V8HI 0 "register_operand" "=")
6717 (match_operand:V8HI 1 "register_operand" "")
6718 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6719 (parallel [(const_int 4) (const_int 12)
6720 (const_int 5) (const_int 13)
6721 (const_int 6) (const_int 14)
6722 (const_int 7) (const_int 15)])))]
6725 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6729 (define_expand "vec_interleave_lowv8hi"
6730 [(set (match_operand:V8HI 0 "register_operand" "")
6733 (match_operand:V8HI 1 "register_operand" "")
6734 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6735 (parallel [(const_int 0) (const_int 8)
6736 (const_int 1) (const_int 9)
6737 (const_int 2) (const_int 10)
6738 (const_int 3) (const_int 11)])))]
6741 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6745 (define_expand "vec_interleave_highv4si"
6746 [(set (match_operand:V4SI 0 "register_operand" "")
6749 (match_operand:V4SI 1 "register_operand" "")
6750 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6751 (parallel [(const_int 2) (const_int 6)
6752 (const_int 3) (const_int 7)])))]
6755 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6759 (define_expand "vec_interleave_lowv4si"
6760 [(set (match_operand:V4SI 0 "register_operand" "")
6763 (match_operand:V4SI 1 "register_operand" "")
6764 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6765 (parallel [(const_int 0) (const_int 4)
6766 (const_int 1) (const_int 5)])))]
6769 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6773 (define_expand "vec_interleave_highv2di"
6774 [(set (match_operand:V2DI 0 "register_operand" "")
6777 (match_operand:V2DI 1 "register_operand" "")
6778 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6779 (parallel [(const_int 1)
6783 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6787 (define_expand "vec_interleave_lowv2di"
6788 [(set (match_operand:V2DI 0 "register_operand" "")
6791 (match_operand:V2DI 1 "register_operand" "")
6792 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6793 (parallel [(const_int 0)
6797 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6801 (define_expand "vec_interleave_highv4sf"
6802 [(set (match_operand:V4SF 0 "register_operand" "")
6805 (match_operand:V4SF 1 "register_operand" "")
6806 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6807 (parallel [(const_int 2) (const_int 6)
6808 (const_int 3) (const_int 7)])))]
6811 (define_expand "vec_interleave_lowv4sf"
6812 [(set (match_operand:V4SF 0 "register_operand" "")
6815 (match_operand:V4SF 1 "register_operand" "")
6816 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6817 (parallel [(const_int 0) (const_int 4)
6818 (const_int 1) (const_int 5)])))]
6821 (define_expand "vec_interleave_highv2df"
6822 [(set (match_operand:V2DF 0 "register_operand" "")
6825 (match_operand:V2DF 1 "register_operand" "")
6826 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6827 (parallel [(const_int 1)
6831 (define_expand "vec_interleave_lowv2df"
6832 [(set (match_operand:V2DF 0 "register_operand" "")
6835 (match_operand:V2DF 1 "register_operand" "")
6836 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6837 (parallel [(const_int 0)
6841 (define_insn "*avx_packsswb"
6842 [(set (match_operand:V16QI 0 "register_operand" "=x")
6845 (match_operand:V8HI 1 "register_operand" "x"))
6847 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6849 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6850 [(set_attr "type" "sselog")
6851 (set_attr "prefix" "vex")
6852 (set_attr "mode" "TI")])
6854 (define_insn "sse2_packsswb"
6855 [(set (match_operand:V16QI 0 "register_operand" "=x")
6858 (match_operand:V8HI 1 "register_operand" "0"))
6860 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6862 "packsswb\t{%2, %0|%0, %2}"
6863 [(set_attr "type" "sselog")
6864 (set_attr "prefix_data16" "1")
6865 (set_attr "mode" "TI")])
6867 (define_insn "*avx_packssdw"
6868 [(set (match_operand:V8HI 0 "register_operand" "=x")
6871 (match_operand:V4SI 1 "register_operand" "x"))
6873 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6875 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6876 [(set_attr "type" "sselog")
6877 (set_attr "prefix" "vex")
6878 (set_attr "mode" "TI")])
6880 (define_insn "sse2_packssdw"
6881 [(set (match_operand:V8HI 0 "register_operand" "=x")
6884 (match_operand:V4SI 1 "register_operand" "0"))
6886 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6888 "packssdw\t{%2, %0|%0, %2}"
6889 [(set_attr "type" "sselog")
6890 (set_attr "prefix_data16" "1")
6891 (set_attr "mode" "TI")])
6893 (define_insn "*avx_packuswb"
6894 [(set (match_operand:V16QI 0 "register_operand" "=x")
6897 (match_operand:V8HI 1 "register_operand" "x"))
6899 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6901 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6902 [(set_attr "type" "sselog")
6903 (set_attr "prefix" "vex")
6904 (set_attr "mode" "TI")])
6906 (define_insn "sse2_packuswb"
6907 [(set (match_operand:V16QI 0 "register_operand" "=x")
6910 (match_operand:V8HI 1 "register_operand" "0"))
6912 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6914 "packuswb\t{%2, %0|%0, %2}"
6915 [(set_attr "type" "sselog")
6916 (set_attr "prefix_data16" "1")
6917 (set_attr "mode" "TI")])
6919 (define_insn "*avx_punpckhbw"
6920 [(set (match_operand:V16QI 0 "register_operand" "=x")
6923 (match_operand:V16QI 1 "register_operand" "x")
6924 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6925 (parallel [(const_int 8) (const_int 24)
6926 (const_int 9) (const_int 25)
6927 (const_int 10) (const_int 26)
6928 (const_int 11) (const_int 27)
6929 (const_int 12) (const_int 28)
6930 (const_int 13) (const_int 29)
6931 (const_int 14) (const_int 30)
6932 (const_int 15) (const_int 31)])))]
6934 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6935 [(set_attr "type" "sselog")
6936 (set_attr "prefix" "vex")
6937 (set_attr "mode" "TI")])
6939 (define_insn "sse2_punpckhbw"
6940 [(set (match_operand:V16QI 0 "register_operand" "=x")
6943 (match_operand:V16QI 1 "register_operand" "0")
6944 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6945 (parallel [(const_int 8) (const_int 24)
6946 (const_int 9) (const_int 25)
6947 (const_int 10) (const_int 26)
6948 (const_int 11) (const_int 27)
6949 (const_int 12) (const_int 28)
6950 (const_int 13) (const_int 29)
6951 (const_int 14) (const_int 30)
6952 (const_int 15) (const_int 31)])))]
6954 "punpckhbw\t{%2, %0|%0, %2}"
6955 [(set_attr "type" "sselog")
6956 (set_attr "prefix_data16" "1")
6957 (set_attr "mode" "TI")])
6959 (define_insn "*avx_punpcklbw"
6960 [(set (match_operand:V16QI 0 "register_operand" "=x")
6963 (match_operand:V16QI 1 "register_operand" "x")
6964 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6965 (parallel [(const_int 0) (const_int 16)
6966 (const_int 1) (const_int 17)
6967 (const_int 2) (const_int 18)
6968 (const_int 3) (const_int 19)
6969 (const_int 4) (const_int 20)
6970 (const_int 5) (const_int 21)
6971 (const_int 6) (const_int 22)
6972 (const_int 7) (const_int 23)])))]
6974 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6975 [(set_attr "type" "sselog")
6976 (set_attr "prefix" "vex")
6977 (set_attr "mode" "TI")])
6979 (define_insn "sse2_punpcklbw"
6980 [(set (match_operand:V16QI 0 "register_operand" "=x")
6983 (match_operand:V16QI 1 "register_operand" "0")
6984 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6985 (parallel [(const_int 0) (const_int 16)
6986 (const_int 1) (const_int 17)
6987 (const_int 2) (const_int 18)
6988 (const_int 3) (const_int 19)
6989 (const_int 4) (const_int 20)
6990 (const_int 5) (const_int 21)
6991 (const_int 6) (const_int 22)
6992 (const_int 7) (const_int 23)])))]
6994 "punpcklbw\t{%2, %0|%0, %2}"
6995 [(set_attr "type" "sselog")
6996 (set_attr "prefix_data16" "1")
6997 (set_attr "mode" "TI")])
6999 (define_insn "*avx_punpckhwd"
7000 [(set (match_operand:V8HI 0 "register_operand" "=x")
7003 (match_operand:V8HI 1 "register_operand" "x")
7004 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7005 (parallel [(const_int 4) (const_int 12)
7006 (const_int 5) (const_int 13)
7007 (const_int 6) (const_int 14)
7008 (const_int 7) (const_int 15)])))]
7010 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
7011 [(set_attr "type" "sselog")
7012 (set_attr "prefix" "vex")
7013 (set_attr "mode" "TI")])
7015 (define_insn "sse2_punpckhwd"
7016 [(set (match_operand:V8HI 0 "register_operand" "=x")
7019 (match_operand:V8HI 1 "register_operand" "0")
7020 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7021 (parallel [(const_int 4) (const_int 12)
7022 (const_int 5) (const_int 13)
7023 (const_int 6) (const_int 14)
7024 (const_int 7) (const_int 15)])))]
7026 "punpckhwd\t{%2, %0|%0, %2}"
7027 [(set_attr "type" "sselog")
7028 (set_attr "prefix_data16" "1")
7029 (set_attr "mode" "TI")])
7031 (define_insn "*avx_punpcklwd"
7032 [(set (match_operand:V8HI 0 "register_operand" "=x")
7035 (match_operand:V8HI 1 "register_operand" "x")
7036 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7037 (parallel [(const_int 0) (const_int 8)
7038 (const_int 1) (const_int 9)
7039 (const_int 2) (const_int 10)
7040 (const_int 3) (const_int 11)])))]
7042 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7043 [(set_attr "type" "sselog")
7044 (set_attr "prefix" "vex")
7045 (set_attr "mode" "TI")])
7047 (define_insn "sse2_punpcklwd"
7048 [(set (match_operand:V8HI 0 "register_operand" "=x")
7051 (match_operand:V8HI 1 "register_operand" "0")
7052 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7053 (parallel [(const_int 0) (const_int 8)
7054 (const_int 1) (const_int 9)
7055 (const_int 2) (const_int 10)
7056 (const_int 3) (const_int 11)])))]
7058 "punpcklwd\t{%2, %0|%0, %2}"
7059 [(set_attr "type" "sselog")
7060 (set_attr "prefix_data16" "1")
7061 (set_attr "mode" "TI")])
7063 (define_insn "*avx_punpckhdq"
7064 [(set (match_operand:V4SI 0 "register_operand" "=x")
7067 (match_operand:V4SI 1 "register_operand" "x")
7068 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7069 (parallel [(const_int 2) (const_int 6)
7070 (const_int 3) (const_int 7)])))]
7072 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7073 [(set_attr "type" "sselog")
7074 (set_attr "prefix" "vex")
7075 (set_attr "mode" "TI")])
7077 (define_insn "sse2_punpckhdq"
7078 [(set (match_operand:V4SI 0 "register_operand" "=x")
7081 (match_operand:V4SI 1 "register_operand" "0")
7082 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7083 (parallel [(const_int 2) (const_int 6)
7084 (const_int 3) (const_int 7)])))]
7086 "punpckhdq\t{%2, %0|%0, %2}"
7087 [(set_attr "type" "sselog")
7088 (set_attr "prefix_data16" "1")
7089 (set_attr "mode" "TI")])
7091 (define_insn "*avx_punpckldq"
7092 [(set (match_operand:V4SI 0 "register_operand" "=x")
7095 (match_operand:V4SI 1 "register_operand" "x")
7096 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7097 (parallel [(const_int 0) (const_int 4)
7098 (const_int 1) (const_int 5)])))]
7100 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix" "vex")
7103 (set_attr "mode" "TI")])
7105 (define_insn "sse2_punpckldq"
7106 [(set (match_operand:V4SI 0 "register_operand" "=x")
7109 (match_operand:V4SI 1 "register_operand" "0")
7110 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7111 (parallel [(const_int 0) (const_int 4)
7112 (const_int 1) (const_int 5)])))]
7114 "punpckldq\t{%2, %0|%0, %2}"
7115 [(set_attr "type" "sselog")
7116 (set_attr "prefix_data16" "1")
7117 (set_attr "mode" "TI")])
7119 (define_insn "*avx_pinsr<ssevecsize>"
7120 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7121 (vec_merge:SSEMODE124
7122 (vec_duplicate:SSEMODE124
7123 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7124 (match_operand:SSEMODE124 1 "register_operand" "x")
7125 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7128 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7129 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7131 [(set_attr "type" "sselog")
7132 (set (attr "prefix_extra")
7133 (if_then_else (match_operand:V8HI 0 "register_operand" "")
7135 (const_string "1")))
7136 (set_attr "length_immediate" "1")
7137 (set_attr "prefix" "vex")
7138 (set_attr "mode" "TI")])
7140 (define_insn "*sse4_1_pinsrb"
7141 [(set (match_operand:V16QI 0 "register_operand" "=x")
7143 (vec_duplicate:V16QI
7144 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7145 (match_operand:V16QI 1 "register_operand" "0")
7146 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7149 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7150 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7152 [(set_attr "type" "sselog")
7153 (set_attr "prefix_extra" "1")
7154 (set_attr "length_immediate" "1")
7155 (set_attr "mode" "TI")])
7157 (define_insn "*sse2_pinsrw"
7158 [(set (match_operand:V8HI 0 "register_operand" "=x")
7161 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7162 (match_operand:V8HI 1 "register_operand" "0")
7163 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7166 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7167 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7169 [(set_attr "type" "sselog")
7170 (set_attr "prefix_data16" "1")
7171 (set_attr "length_immediate" "1")
7172 (set_attr "mode" "TI")])
7174 ;; It must come before sse2_loadld since it is preferred.
7175 (define_insn "*sse4_1_pinsrd"
7176 [(set (match_operand:V4SI 0 "register_operand" "=x")
7179 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7180 (match_operand:V4SI 1 "register_operand" "0")
7181 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7184 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7185 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7187 [(set_attr "type" "sselog")
7188 (set_attr "prefix_extra" "1")
7189 (set_attr "length_immediate" "1")
7190 (set_attr "mode" "TI")])
7192 (define_insn "*avx_pinsrq"
7193 [(set (match_operand:V2DI 0 "register_operand" "=x")
7196 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7197 (match_operand:V2DI 1 "register_operand" "x")
7198 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7199 "TARGET_AVX && TARGET_64BIT"
7201 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7202 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7204 [(set_attr "type" "sselog")
7205 (set_attr "prefix_extra" "1")
7206 (set_attr "length_immediate" "1")
7207 (set_attr "prefix" "vex")
7208 (set_attr "mode" "TI")])
7210 (define_insn "*sse4_1_pinsrq"
7211 [(set (match_operand:V2DI 0 "register_operand" "=x")
7214 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7215 (match_operand:V2DI 1 "register_operand" "0")
7216 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7217 "TARGET_SSE4_1 && TARGET_64BIT"
7219 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7220 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7222 [(set_attr "type" "sselog")
7223 (set_attr "prefix_rex" "1")
7224 (set_attr "prefix_extra" "1")
7225 (set_attr "length_immediate" "1")
7226 (set_attr "mode" "TI")])
7228 (define_insn "*sse4_1_pextrb"
7229 [(set (match_operand:SI 0 "register_operand" "=r")
7232 (match_operand:V16QI 1 "register_operand" "x")
7233 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7235 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7236 [(set_attr "type" "sselog")
7237 (set_attr "prefix_extra" "1")
7238 (set_attr "length_immediate" "1")
7239 (set_attr "prefix" "maybe_vex")
7240 (set_attr "mode" "TI")])
7242 (define_insn "*sse4_1_pextrb_memory"
7243 [(set (match_operand:QI 0 "memory_operand" "=m")
7245 (match_operand:V16QI 1 "register_operand" "x")
7246 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7248 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7249 [(set_attr "type" "sselog")
7250 (set_attr "prefix_extra" "1")
7251 (set_attr "length_immediate" "1")
7252 (set_attr "prefix" "maybe_vex")
7253 (set_attr "mode" "TI")])
7255 (define_insn "*sse2_pextrw"
7256 [(set (match_operand:SI 0 "register_operand" "=r")
7259 (match_operand:V8HI 1 "register_operand" "x")
7260 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7262 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7263 [(set_attr "type" "sselog")
7264 (set_attr "prefix_data16" "1")
7265 (set_attr "length_immediate" "1")
7266 (set_attr "prefix" "maybe_vex")
7267 (set_attr "mode" "TI")])
7269 (define_insn "*sse4_1_pextrw_memory"
7270 [(set (match_operand:HI 0 "memory_operand" "=m")
7272 (match_operand:V8HI 1 "register_operand" "x")
7273 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7275 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7276 [(set_attr "type" "sselog")
7277 (set_attr "prefix_extra" "1")
7278 (set_attr "length_immediate" "1")
7279 (set_attr "prefix" "maybe_vex")
7280 (set_attr "mode" "TI")])
7282 (define_insn "*sse4_1_pextrd"
7283 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7285 (match_operand:V4SI 1 "register_operand" "x")
7286 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7288 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7289 [(set_attr "type" "sselog")
7290 (set_attr "prefix_extra" "1")
7291 (set_attr "length_immediate" "1")
7292 (set_attr "prefix" "maybe_vex")
7293 (set_attr "mode" "TI")])
7295 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7296 (define_insn "*sse4_1_pextrq"
7297 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7299 (match_operand:V2DI 1 "register_operand" "x")
7300 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7301 "TARGET_SSE4_1 && TARGET_64BIT"
7302 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7303 [(set_attr "type" "sselog")
7304 (set_attr "prefix_rex" "1")
7305 (set_attr "prefix_extra" "1")
7306 (set_attr "length_immediate" "1")
7307 (set_attr "prefix" "maybe_vex")
7308 (set_attr "mode" "TI")])
7310 (define_expand "sse2_pshufd"
7311 [(match_operand:V4SI 0 "register_operand" "")
7312 (match_operand:V4SI 1 "nonimmediate_operand" "")
7313 (match_operand:SI 2 "const_int_operand" "")]
7316 int mask = INTVAL (operands[2]);
7317 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7318 GEN_INT ((mask >> 0) & 3),
7319 GEN_INT ((mask >> 2) & 3),
7320 GEN_INT ((mask >> 4) & 3),
7321 GEN_INT ((mask >> 6) & 3)));
7325 (define_insn "sse2_pshufd_1"
7326 [(set (match_operand:V4SI 0 "register_operand" "=x")
7328 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7329 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7330 (match_operand 3 "const_0_to_3_operand" "")
7331 (match_operand 4 "const_0_to_3_operand" "")
7332 (match_operand 5 "const_0_to_3_operand" "")])))]
7336 mask |= INTVAL (operands[2]) << 0;
7337 mask |= INTVAL (operands[3]) << 2;
7338 mask |= INTVAL (operands[4]) << 4;
7339 mask |= INTVAL (operands[5]) << 6;
7340 operands[2] = GEN_INT (mask);
7342 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7344 [(set_attr "type" "sselog1")
7345 (set_attr "prefix_data16" "1")
7346 (set_attr "prefix" "maybe_vex")
7347 (set_attr "length_immediate" "1")
7348 (set_attr "mode" "TI")])
7350 (define_expand "sse2_pshuflw"
7351 [(match_operand:V8HI 0 "register_operand" "")
7352 (match_operand:V8HI 1 "nonimmediate_operand" "")
7353 (match_operand:SI 2 "const_int_operand" "")]
7356 int mask = INTVAL (operands[2]);
7357 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7358 GEN_INT ((mask >> 0) & 3),
7359 GEN_INT ((mask >> 2) & 3),
7360 GEN_INT ((mask >> 4) & 3),
7361 GEN_INT ((mask >> 6) & 3)));
7365 (define_insn "sse2_pshuflw_1"
7366 [(set (match_operand:V8HI 0 "register_operand" "=x")
7368 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7369 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7370 (match_operand 3 "const_0_to_3_operand" "")
7371 (match_operand 4 "const_0_to_3_operand" "")
7372 (match_operand 5 "const_0_to_3_operand" "")
7380 mask |= INTVAL (operands[2]) << 0;
7381 mask |= INTVAL (operands[3]) << 2;
7382 mask |= INTVAL (operands[4]) << 4;
7383 mask |= INTVAL (operands[5]) << 6;
7384 operands[2] = GEN_INT (mask);
7386 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7388 [(set_attr "type" "sselog")
7389 (set_attr "prefix_data16" "0")
7390 (set_attr "prefix_rep" "1")
7391 (set_attr "prefix" "maybe_vex")
7392 (set_attr "length_immediate" "1")
7393 (set_attr "mode" "TI")])
7395 (define_expand "sse2_pshufhw"
7396 [(match_operand:V8HI 0 "register_operand" "")
7397 (match_operand:V8HI 1 "nonimmediate_operand" "")
7398 (match_operand:SI 2 "const_int_operand" "")]
7401 int mask = INTVAL (operands[2]);
7402 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7403 GEN_INT (((mask >> 0) & 3) + 4),
7404 GEN_INT (((mask >> 2) & 3) + 4),
7405 GEN_INT (((mask >> 4) & 3) + 4),
7406 GEN_INT (((mask >> 6) & 3) + 4)));
7410 (define_insn "sse2_pshufhw_1"
7411 [(set (match_operand:V8HI 0 "register_operand" "=x")
7413 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7414 (parallel [(const_int 0)
7418 (match_operand 2 "const_4_to_7_operand" "")
7419 (match_operand 3 "const_4_to_7_operand" "")
7420 (match_operand 4 "const_4_to_7_operand" "")
7421 (match_operand 5 "const_4_to_7_operand" "")])))]
7425 mask |= (INTVAL (operands[2]) - 4) << 0;
7426 mask |= (INTVAL (operands[3]) - 4) << 2;
7427 mask |= (INTVAL (operands[4]) - 4) << 4;
7428 mask |= (INTVAL (operands[5]) - 4) << 6;
7429 operands[2] = GEN_INT (mask);
7431 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7433 [(set_attr "type" "sselog")
7434 (set_attr "prefix_rep" "1")
7435 (set_attr "prefix_data16" "0")
7436 (set_attr "prefix" "maybe_vex")
7437 (set_attr "length_immediate" "1")
7438 (set_attr "mode" "TI")])
7440 (define_expand "sse2_loadd"
7441 [(set (match_operand:V4SI 0 "register_operand" "")
7444 (match_operand:SI 1 "nonimmediate_operand" ""))
7448 "operands[2] = CONST0_RTX (V4SImode);")
7450 (define_insn "*avx_loadld"
7451 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7454 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7455 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7459 vmovd\t{%2, %0|%0, %2}
7460 vmovd\t{%2, %0|%0, %2}
7461 vmovss\t{%2, %1, %0|%0, %1, %2}"
7462 [(set_attr "type" "ssemov")
7463 (set_attr "prefix" "vex")
7464 (set_attr "mode" "TI,TI,V4SF")])
7466 (define_insn "sse2_loadld"
7467 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7470 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7471 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7475 movd\t{%2, %0|%0, %2}
7476 movd\t{%2, %0|%0, %2}
7477 movss\t{%2, %0|%0, %2}
7478 movss\t{%2, %0|%0, %2}"
7479 [(set_attr "type" "ssemov")
7480 (set_attr "mode" "TI,TI,V4SF,SF")])
7482 (define_insn_and_split "sse2_stored"
7483 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7485 (match_operand:V4SI 1 "register_operand" "x,Yi")
7486 (parallel [(const_int 0)])))]
7489 "&& reload_completed
7490 && (TARGET_INTER_UNIT_MOVES
7491 || MEM_P (operands [0])
7492 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7493 [(set (match_dup 0) (match_dup 1))]
7495 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7498 (define_insn_and_split "*vec_ext_v4si_mem"
7499 [(set (match_operand:SI 0 "register_operand" "=r")
7501 (match_operand:V4SI 1 "memory_operand" "o")
7502 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7508 int i = INTVAL (operands[2]);
7510 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7514 (define_expand "sse_storeq"
7515 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7517 (match_operand:V2DI 1 "register_operand" "")
7518 (parallel [(const_int 0)])))]
7522 (define_insn "*sse2_storeq_rex64"
7523 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7525 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7526 (parallel [(const_int 0)])))]
7527 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7531 %vmov{q}\t{%1, %0|%0, %1}"
7532 [(set_attr "type" "*,*,imov")
7533 (set_attr "prefix" "*,*,maybe_vex")
7534 (set_attr "mode" "*,*,DI")])
7536 (define_insn "*sse2_storeq"
7537 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7539 (match_operand:V2DI 1 "register_operand" "x")
7540 (parallel [(const_int 0)])))]
7545 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7547 (match_operand:V2DI 1 "register_operand" "")
7548 (parallel [(const_int 0)])))]
7551 && (TARGET_INTER_UNIT_MOVES
7552 || MEM_P (operands [0])
7553 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7554 [(set (match_dup 0) (match_dup 1))]
7556 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7559 (define_insn "*vec_extractv2di_1_rex64_avx"
7560 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7562 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7563 (parallel [(const_int 1)])))]
7566 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7568 vmovhps\t{%1, %0|%0, %1}
7569 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7570 vmovq\t{%H1, %0|%0, %H1}
7571 vmov{q}\t{%H1, %0|%0, %H1}"
7572 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7573 (set_attr "length_immediate" "*,1,*,*")
7574 (set_attr "memory" "*,none,*,*")
7575 (set_attr "prefix" "vex")
7576 (set_attr "mode" "V2SF,TI,TI,DI")])
7578 (define_insn "*vec_extractv2di_1_rex64"
7579 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7581 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7582 (parallel [(const_int 1)])))]
7583 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7585 movhps\t{%1, %0|%0, %1}
7586 psrldq\t{$8, %0|%0, 8}
7587 movq\t{%H1, %0|%0, %H1}
7588 mov{q}\t{%H1, %0|%0, %H1}"
7589 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7590 (set_attr "length_immediate" "*,1,*,*")
7591 (set_attr "atom_unit" "*,sishuf,*,*")
7592 (set_attr "memory" "*,none,*,*")
7593 (set_attr "mode" "V2SF,TI,TI,DI")])
7595 (define_insn "*vec_extractv2di_1_avx"
7596 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7598 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7599 (parallel [(const_int 1)])))]
7602 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7604 vmovhps\t{%1, %0|%0, %1}
7605 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7606 vmovq\t{%H1, %0|%0, %H1}"
7607 [(set_attr "type" "ssemov,sseishft,ssemov")
7608 (set_attr "length_immediate" "*,1,*")
7609 (set_attr "memory" "*,none,*")
7610 (set_attr "prefix" "vex")
7611 (set_attr "mode" "V2SF,TI,TI")])
7613 (define_insn "*vec_extractv2di_1_sse2"
7614 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7616 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7617 (parallel [(const_int 1)])))]
7619 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7621 movhps\t{%1, %0|%0, %1}
7622 psrldq\t{$8, %0|%0, 8}
7623 movq\t{%H1, %0|%0, %H1}"
7624 [(set_attr "type" "ssemov,sseishft,ssemov")
7625 (set_attr "length_immediate" "*,1,*")
7626 (set_attr "atom_unit" "*,sishuf,*")
7627 (set_attr "memory" "*,none,*")
7628 (set_attr "mode" "V2SF,TI,TI")])
7630 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7631 (define_insn "*vec_extractv2di_1_sse"
7632 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7634 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7635 (parallel [(const_int 1)])))]
7636 "!TARGET_SSE2 && TARGET_SSE
7637 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7639 movhps\t{%1, %0|%0, %1}
7640 movhlps\t{%1, %0|%0, %1}
7641 movlps\t{%H1, %0|%0, %H1}"
7642 [(set_attr "type" "ssemov")
7643 (set_attr "mode" "V2SF,V4SF,V2SF")])
7645 (define_insn "*vec_dupv4si"
7646 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7648 (match_operand:SI 1 "register_operand" " Y2,0")))]
7651 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7652 shufps\t{$0, %0, %0|%0, %0, 0}"
7653 [(set_attr "type" "sselog1")
7654 (set_attr "prefix" "maybe_vex,orig")
7655 (set_attr "length_immediate" "1")
7656 (set_attr "mode" "TI,V4SF")])
7658 (define_insn "*vec_dupv2di_avx"
7659 [(set (match_operand:V2DI 0 "register_operand" "=x")
7661 (match_operand:DI 1 "register_operand" "x")))]
7663 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7664 [(set_attr "type" "sselog1")
7665 (set_attr "prefix" "vex")
7666 (set_attr "mode" "TI")])
7668 (define_insn "*vec_dupv2di"
7669 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7671 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7676 [(set_attr "type" "sselog1,ssemov")
7677 (set_attr "mode" "TI,V4SF")])
7679 (define_insn "*vec_concatv2si_avx"
7680 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7682 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7683 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7686 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7687 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7688 vmovd\t{%1, %0|%0, %1}
7689 punpckldq\t{%2, %0|%0, %2}
7690 movd\t{%1, %0|%0, %1}"
7691 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7692 (set_attr "prefix_extra" "1,*,*,*,*")
7693 (set_attr "length_immediate" "1,*,*,*,*")
7694 (set (attr "prefix")
7695 (if_then_else (eq_attr "alternative" "3,4")
7696 (const_string "orig")
7697 (const_string "vex")))
7698 (set_attr "mode" "TI,TI,TI,DI,DI")])
7700 (define_insn "*vec_concatv2si_sse4_1"
7701 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7703 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7704 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7707 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7708 punpckldq\t{%2, %0|%0, %2}
7709 movd\t{%1, %0|%0, %1}
7710 punpckldq\t{%2, %0|%0, %2}
7711 movd\t{%1, %0|%0, %1}"
7712 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7713 (set_attr "prefix_extra" "1,*,*,*,*")
7714 (set_attr "length_immediate" "1,*,*,*,*")
7715 (set_attr "mode" "TI,TI,TI,DI,DI")])
7717 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7718 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7719 ;; alternatives pretty much forces the MMX alternative to be chosen.
7720 (define_insn "*vec_concatv2si_sse2"
7721 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7723 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7724 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7727 punpckldq\t{%2, %0|%0, %2}
7728 movd\t{%1, %0|%0, %1}
7729 punpckldq\t{%2, %0|%0, %2}
7730 movd\t{%1, %0|%0, %1}"
7731 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7732 (set_attr "mode" "TI,TI,DI,DI")])
7734 (define_insn "*vec_concatv2si_sse"
7735 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7737 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7738 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7741 unpcklps\t{%2, %0|%0, %2}
7742 movss\t{%1, %0|%0, %1}
7743 punpckldq\t{%2, %0|%0, %2}
7744 movd\t{%1, %0|%0, %1}"
7745 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7746 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7748 (define_insn "*vec_concatv4si_1_avx"
7749 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7751 (match_operand:V2SI 1 "register_operand" " x,x")
7752 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7755 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7756 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7757 [(set_attr "type" "sselog,ssemov")
7758 (set_attr "prefix" "vex")
7759 (set_attr "mode" "TI,V2SF")])
7761 (define_insn "*vec_concatv4si_1"
7762 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7764 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7765 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7768 punpcklqdq\t{%2, %0|%0, %2}
7769 movlhps\t{%2, %0|%0, %2}
7770 movhps\t{%2, %0|%0, %2}"
7771 [(set_attr "type" "sselog,ssemov,ssemov")
7772 (set_attr "mode" "TI,V4SF,V2SF")])
7774 (define_insn "*vec_concatv2di_avx"
7775 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7777 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7778 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7779 "!TARGET_64BIT && TARGET_AVX"
7781 vmovq\t{%1, %0|%0, %1}
7782 movq2dq\t{%1, %0|%0, %1}
7783 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7784 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7785 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7786 (set (attr "prefix")
7787 (if_then_else (eq_attr "alternative" "1")
7788 (const_string "orig")
7789 (const_string "vex")))
7790 (set_attr "mode" "TI,TI,TI,V2SF")])
7792 (define_insn "vec_concatv2di"
7793 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7795 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7796 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7797 "!TARGET_64BIT && TARGET_SSE"
7799 movq\t{%1, %0|%0, %1}
7800 movq2dq\t{%1, %0|%0, %1}
7801 punpcklqdq\t{%2, %0|%0, %2}
7802 movlhps\t{%2, %0|%0, %2}
7803 movhps\t{%2, %0|%0, %2}"
7804 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7805 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7807 (define_insn "*vec_concatv2di_rex64_avx"
7808 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7810 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7811 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7812 "TARGET_64BIT && TARGET_AVX"
7814 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7815 vmovq\t{%1, %0|%0, %1}
7816 vmovq\t{%1, %0|%0, %1}
7817 movq2dq\t{%1, %0|%0, %1}
7818 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7819 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7820 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7821 (set_attr "prefix_extra" "1,*,*,*,*,*")
7822 (set_attr "length_immediate" "1,*,*,*,*,*")
7823 (set (attr "prefix")
7824 (if_then_else (eq_attr "alternative" "3")
7825 (const_string "orig")
7826 (const_string "vex")))
7827 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7829 (define_insn "*vec_concatv2di_rex64_sse4_1"
7830 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7832 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7833 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7834 "TARGET_64BIT && TARGET_SSE4_1"
7836 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7837 movq\t{%1, %0|%0, %1}
7838 movq\t{%1, %0|%0, %1}
7839 movq2dq\t{%1, %0|%0, %1}
7840 punpcklqdq\t{%2, %0|%0, %2}
7841 movlhps\t{%2, %0|%0, %2}
7842 movhps\t{%2, %0|%0, %2}"
7843 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7844 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7845 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7846 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7847 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7849 (define_insn "*vec_concatv2di_rex64_sse"
7850 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7852 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7853 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7854 "TARGET_64BIT && TARGET_SSE"
7856 movq\t{%1, %0|%0, %1}
7857 movq\t{%1, %0|%0, %1}
7858 movq2dq\t{%1, %0|%0, %1}
7859 punpcklqdq\t{%2, %0|%0, %2}
7860 movlhps\t{%2, %0|%0, %2}
7861 movhps\t{%2, %0|%0, %2}"
7862 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7863 (set_attr "prefix_rex" "*,1,*,*,*,*")
7864 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7866 (define_expand "vec_unpacku_hi_v16qi"
7867 [(match_operand:V8HI 0 "register_operand" "")
7868 (match_operand:V16QI 1 "register_operand" "")]
7872 ix86_expand_sse4_unpack (operands, true, true);
7874 ix86_expand_sse_unpack (operands, true, true);
7878 (define_expand "vec_unpacks_hi_v16qi"
7879 [(match_operand:V8HI 0 "register_operand" "")
7880 (match_operand:V16QI 1 "register_operand" "")]
7884 ix86_expand_sse4_unpack (operands, false, true);
7886 ix86_expand_sse_unpack (operands, false, true);
7890 (define_expand "vec_unpacku_lo_v16qi"
7891 [(match_operand:V8HI 0 "register_operand" "")
7892 (match_operand:V16QI 1 "register_operand" "")]
7896 ix86_expand_sse4_unpack (operands, true, false);
7898 ix86_expand_sse_unpack (operands, true, false);
7902 (define_expand "vec_unpacks_lo_v16qi"
7903 [(match_operand:V8HI 0 "register_operand" "")
7904 (match_operand:V16QI 1 "register_operand" "")]
7908 ix86_expand_sse4_unpack (operands, false, false);
7910 ix86_expand_sse_unpack (operands, false, false);
7914 (define_expand "vec_unpacku_hi_v8hi"
7915 [(match_operand:V4SI 0 "register_operand" "")
7916 (match_operand:V8HI 1 "register_operand" "")]
7920 ix86_expand_sse4_unpack (operands, true, true);
7922 ix86_expand_sse_unpack (operands, true, true);
7926 (define_expand "vec_unpacks_hi_v8hi"
7927 [(match_operand:V4SI 0 "register_operand" "")
7928 (match_operand:V8HI 1 "register_operand" "")]
7932 ix86_expand_sse4_unpack (operands, false, true);
7934 ix86_expand_sse_unpack (operands, false, true);
7938 (define_expand "vec_unpacku_lo_v8hi"
7939 [(match_operand:V4SI 0 "register_operand" "")
7940 (match_operand:V8HI 1 "register_operand" "")]
7944 ix86_expand_sse4_unpack (operands, true, false);
7946 ix86_expand_sse_unpack (operands, true, false);
7950 (define_expand "vec_unpacks_lo_v8hi"
7951 [(match_operand:V4SI 0 "register_operand" "")
7952 (match_operand:V8HI 1 "register_operand" "")]
7956 ix86_expand_sse4_unpack (operands, false, false);
7958 ix86_expand_sse_unpack (operands, false, false);
7962 (define_expand "vec_unpacku_hi_v4si"
7963 [(match_operand:V2DI 0 "register_operand" "")
7964 (match_operand:V4SI 1 "register_operand" "")]
7968 ix86_expand_sse4_unpack (operands, true, true);
7970 ix86_expand_sse_unpack (operands, true, true);
7974 (define_expand "vec_unpacks_hi_v4si"
7975 [(match_operand:V2DI 0 "register_operand" "")
7976 (match_operand:V4SI 1 "register_operand" "")]
7980 ix86_expand_sse4_unpack (operands, false, true);
7982 ix86_expand_sse_unpack (operands, false, true);
7986 (define_expand "vec_unpacku_lo_v4si"
7987 [(match_operand:V2DI 0 "register_operand" "")
7988 (match_operand:V4SI 1 "register_operand" "")]
7992 ix86_expand_sse4_unpack (operands, true, false);
7994 ix86_expand_sse_unpack (operands, true, false);
7998 (define_expand "vec_unpacks_lo_v4si"
7999 [(match_operand:V2DI 0 "register_operand" "")
8000 (match_operand:V4SI 1 "register_operand" "")]
8004 ix86_expand_sse4_unpack (operands, false, false);
8006 ix86_expand_sse_unpack (operands, false, false);
8010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8016 (define_expand "sse2_uavgv16qi3"
8017 [(set (match_operand:V16QI 0 "register_operand" "")
8023 (match_operand:V16QI 1 "nonimmediate_operand" ""))
8025 (match_operand:V16QI 2 "nonimmediate_operand" "")))
8026 (const_vector:V16QI [(const_int 1) (const_int 1)
8027 (const_int 1) (const_int 1)
8028 (const_int 1) (const_int 1)
8029 (const_int 1) (const_int 1)
8030 (const_int 1) (const_int 1)
8031 (const_int 1) (const_int 1)
8032 (const_int 1) (const_int 1)
8033 (const_int 1) (const_int 1)]))
8036 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
8038 (define_insn "*avx_uavgv16qi3"
8039 [(set (match_operand:V16QI 0 "register_operand" "=x")
8045 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
8047 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8048 (const_vector:V16QI [(const_int 1) (const_int 1)
8049 (const_int 1) (const_int 1)
8050 (const_int 1) (const_int 1)
8051 (const_int 1) (const_int 1)
8052 (const_int 1) (const_int 1)
8053 (const_int 1) (const_int 1)
8054 (const_int 1) (const_int 1)
8055 (const_int 1) (const_int 1)]))
8057 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8058 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8059 [(set_attr "type" "sseiadd")
8060 (set_attr "prefix" "vex")
8061 (set_attr "mode" "TI")])
8063 (define_insn "*sse2_uavgv16qi3"
8064 [(set (match_operand:V16QI 0 "register_operand" "=x")
8070 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8072 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8073 (const_vector:V16QI [(const_int 1) (const_int 1)
8074 (const_int 1) (const_int 1)
8075 (const_int 1) (const_int 1)
8076 (const_int 1) (const_int 1)
8077 (const_int 1) (const_int 1)
8078 (const_int 1) (const_int 1)
8079 (const_int 1) (const_int 1)
8080 (const_int 1) (const_int 1)]))
8082 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8083 "pavgb\t{%2, %0|%0, %2}"
8084 [(set_attr "type" "sseiadd")
8085 (set_attr "prefix_data16" "1")
8086 (set_attr "mode" "TI")])
8088 (define_expand "sse2_uavgv8hi3"
8089 [(set (match_operand:V8HI 0 "register_operand" "")
8095 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8097 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8098 (const_vector:V8HI [(const_int 1) (const_int 1)
8099 (const_int 1) (const_int 1)
8100 (const_int 1) (const_int 1)
8101 (const_int 1) (const_int 1)]))
8104 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8106 (define_insn "*avx_uavgv8hi3"
8107 [(set (match_operand:V8HI 0 "register_operand" "=x")
8113 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8115 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8116 (const_vector:V8HI [(const_int 1) (const_int 1)
8117 (const_int 1) (const_int 1)
8118 (const_int 1) (const_int 1)
8119 (const_int 1) (const_int 1)]))
8121 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8122 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8123 [(set_attr "type" "sseiadd")
8124 (set_attr "prefix" "vex")
8125 (set_attr "mode" "TI")])
8127 (define_insn "*sse2_uavgv8hi3"
8128 [(set (match_operand:V8HI 0 "register_operand" "=x")
8134 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8136 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8137 (const_vector:V8HI [(const_int 1) (const_int 1)
8138 (const_int 1) (const_int 1)
8139 (const_int 1) (const_int 1)
8140 (const_int 1) (const_int 1)]))
8142 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8143 "pavgw\t{%2, %0|%0, %2}"
8144 [(set_attr "type" "sseiadd")
8145 (set_attr "prefix_data16" "1")
8146 (set_attr "mode" "TI")])
8148 ;; The correct representation for this is absolutely enormous, and
8149 ;; surely not generally useful.
8150 (define_insn "*avx_psadbw"
8151 [(set (match_operand:V2DI 0 "register_operand" "=x")
8152 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8153 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8156 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8157 [(set_attr "type" "sseiadd")
8158 (set_attr "prefix" "vex")
8159 (set_attr "mode" "TI")])
8161 (define_insn "sse2_psadbw"
8162 [(set (match_operand:V2DI 0 "register_operand" "=x")
8163 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8164 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8167 "psadbw\t{%2, %0|%0, %2}"
8168 [(set_attr "type" "sseiadd")
8169 (set_attr "atom_unit" "simul")
8170 (set_attr "prefix_data16" "1")
8171 (set_attr "mode" "TI")])
8173 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
8174 [(set (match_operand:SI 0 "register_operand" "=r")
8176 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8178 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8179 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
8180 [(set_attr "type" "ssecvt")
8181 (set_attr "prefix" "vex")
8182 (set_attr "mode" "<MODE>")])
8184 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8185 [(set (match_operand:SI 0 "register_operand" "=r")
8187 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8189 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8190 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8191 [(set_attr "type" "ssemov")
8192 (set_attr "prefix" "maybe_vex")
8193 (set_attr "mode" "<MODE>")])
8195 (define_insn "sse2_pmovmskb"
8196 [(set (match_operand:SI 0 "register_operand" "=r")
8197 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8200 "%vpmovmskb\t{%1, %0|%0, %1}"
8201 [(set_attr "type" "ssemov")
8202 (set_attr "prefix_data16" "1")
8203 (set_attr "prefix" "maybe_vex")
8204 (set_attr "mode" "SI")])
8206 (define_expand "sse2_maskmovdqu"
8207 [(set (match_operand:V16QI 0 "memory_operand" "")
8208 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8209 (match_operand:V16QI 2 "register_operand" "")
8215 (define_insn "*sse2_maskmovdqu"
8216 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8217 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8218 (match_operand:V16QI 2 "register_operand" "x")
8219 (mem:V16QI (match_dup 0))]
8221 "TARGET_SSE2 && !TARGET_64BIT"
8222 ;; @@@ check ordering of operands in intel/nonintel syntax
8223 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8224 [(set_attr "type" "ssemov")
8225 (set_attr "prefix_data16" "1")
8226 ;; The implicit %rdi operand confuses default length_vex computation.
8227 (set_attr "length_vex" "3")
8228 (set_attr "prefix" "maybe_vex")
8229 (set_attr "mode" "TI")])
8231 (define_insn "*sse2_maskmovdqu_rex64"
8232 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8233 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8234 (match_operand:V16QI 2 "register_operand" "x")
8235 (mem:V16QI (match_dup 0))]
8237 "TARGET_SSE2 && TARGET_64BIT"
8238 ;; @@@ check ordering of operands in intel/nonintel syntax
8239 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8240 [(set_attr "type" "ssemov")
8241 (set_attr "prefix_data16" "1")
8242 ;; The implicit %rdi operand confuses default length_vex computation.
8243 (set (attr "length_vex")
8244 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8245 (set_attr "prefix" "maybe_vex")
8246 (set_attr "mode" "TI")])
8248 (define_insn "sse_ldmxcsr"
8249 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8253 [(set_attr "type" "sse")
8254 (set_attr "atom_sse_attr" "mxcsr")
8255 (set_attr "prefix" "maybe_vex")
8256 (set_attr "memory" "load")])
8258 (define_insn "sse_stmxcsr"
8259 [(set (match_operand:SI 0 "memory_operand" "=m")
8260 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8263 [(set_attr "type" "sse")
8264 (set_attr "atom_sse_attr" "mxcsr")
8265 (set_attr "prefix" "maybe_vex")
8266 (set_attr "memory" "store")])
8268 (define_expand "sse_sfence"
8270 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8271 "TARGET_SSE || TARGET_3DNOW_A"
8273 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8274 MEM_VOLATILE_P (operands[0]) = 1;
8277 (define_insn "*sse_sfence"
8278 [(set (match_operand:BLK 0 "" "")
8279 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8280 "TARGET_SSE || TARGET_3DNOW_A"
8282 [(set_attr "type" "sse")
8283 (set_attr "length_address" "0")
8284 (set_attr "atom_sse_attr" "fence")
8285 (set_attr "memory" "unknown")])
8287 (define_insn "sse2_clflush"
8288 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8292 [(set_attr "type" "sse")
8293 (set_attr "atom_sse_attr" "fence")
8294 (set_attr "memory" "unknown")])
8296 (define_expand "sse2_mfence"
8298 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8301 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8302 MEM_VOLATILE_P (operands[0]) = 1;
8305 (define_insn "*sse2_mfence"
8306 [(set (match_operand:BLK 0 "" "")
8307 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8308 "TARGET_64BIT || TARGET_SSE2"
8310 [(set_attr "type" "sse")
8311 (set_attr "length_address" "0")
8312 (set_attr "atom_sse_attr" "fence")
8313 (set_attr "memory" "unknown")])
8315 (define_expand "sse2_lfence"
8317 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8320 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8321 MEM_VOLATILE_P (operands[0]) = 1;
8324 (define_insn "*sse2_lfence"
8325 [(set (match_operand:BLK 0 "" "")
8326 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8329 [(set_attr "type" "sse")
8330 (set_attr "length_address" "0")
8331 (set_attr "atom_sse_attr" "lfence")
8332 (set_attr "memory" "unknown")])
8334 (define_insn "sse3_mwait"
8335 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8336 (match_operand:SI 1 "register_operand" "c")]
8339 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8340 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8341 ;; we only need to set up 32bit registers.
8343 [(set_attr "length" "3")])
8345 (define_insn "sse3_monitor"
8346 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8347 (match_operand:SI 1 "register_operand" "c")
8348 (match_operand:SI 2 "register_operand" "d")]
8350 "TARGET_SSE3 && !TARGET_64BIT"
8351 "monitor\t%0, %1, %2"
8352 [(set_attr "length" "3")])
8354 (define_insn "sse3_monitor64"
8355 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8356 (match_operand:SI 1 "register_operand" "c")
8357 (match_operand:SI 2 "register_operand" "d")]
8359 "TARGET_SSE3 && TARGET_64BIT"
8360 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8361 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8362 ;; zero extended to 64bit, we only need to set up 32bit registers.
8364 [(set_attr "length" "3")])
8366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8368 ;; SSSE3 instructions
8370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8372 (define_insn "*avx_phaddwv8hi3"
8373 [(set (match_operand:V8HI 0 "register_operand" "=x")
8379 (match_operand:V8HI 1 "register_operand" "x")
8380 (parallel [(const_int 0)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8383 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8390 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8396 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8397 (parallel [(const_int 0)]))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8400 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8404 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8405 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8407 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8410 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8411 [(set_attr "type" "sseiadd")
8412 (set_attr "prefix_extra" "1")
8413 (set_attr "prefix" "vex")
8414 (set_attr "mode" "TI")])
8416 (define_insn "ssse3_phaddwv8hi3"
8417 [(set (match_operand:V8HI 0 "register_operand" "=x")
8423 (match_operand:V8HI 1 "register_operand" "0")
8424 (parallel [(const_int 0)]))
8425 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8427 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8428 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8431 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8434 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8440 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8441 (parallel [(const_int 0)]))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8444 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8445 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8448 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8449 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8451 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8454 "phaddw\t{%2, %0|%0, %2}"
8455 [(set_attr "type" "sseiadd")
8456 (set_attr "atom_unit" "complex")
8457 (set_attr "prefix_data16" "1")
8458 (set_attr "prefix_extra" "1")
8459 (set_attr "mode" "TI")])
8461 (define_insn "ssse3_phaddwv4hi3"
8462 [(set (match_operand:V4HI 0 "register_operand" "=y")
8467 (match_operand:V4HI 1 "register_operand" "0")
8468 (parallel [(const_int 0)]))
8469 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8472 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8476 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8477 (parallel [(const_int 0)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8483 "phaddw\t{%2, %0|%0, %2}"
8484 [(set_attr "type" "sseiadd")
8485 (set_attr "atom_unit" "complex")
8486 (set_attr "prefix_extra" "1")
8487 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8488 (set_attr "mode" "DI")])
8490 (define_insn "*avx_phadddv4si3"
8491 [(set (match_operand:V4SI 0 "register_operand" "=x")
8496 (match_operand:V4SI 1 "register_operand" "x")
8497 (parallel [(const_int 0)]))
8498 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8500 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8501 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8505 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8506 (parallel [(const_int 0)]))
8507 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8509 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8510 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8512 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8513 [(set_attr "type" "sseiadd")
8514 (set_attr "prefix_extra" "1")
8515 (set_attr "prefix" "vex")
8516 (set_attr "mode" "TI")])
8518 (define_insn "ssse3_phadddv4si3"
8519 [(set (match_operand:V4SI 0 "register_operand" "=x")
8524 (match_operand:V4SI 1 "register_operand" "0")
8525 (parallel [(const_int 0)]))
8526 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8528 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8529 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8533 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8534 (parallel [(const_int 0)]))
8535 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8537 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8538 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8540 "phaddd\t{%2, %0|%0, %2}"
8541 [(set_attr "type" "sseiadd")
8542 (set_attr "atom_unit" "complex")
8543 (set_attr "prefix_data16" "1")
8544 (set_attr "prefix_extra" "1")
8545 (set_attr "mode" "TI")])
8547 (define_insn "ssse3_phadddv2si3"
8548 [(set (match_operand:V2SI 0 "register_operand" "=y")
8552 (match_operand:V2SI 1 "register_operand" "0")
8553 (parallel [(const_int 0)]))
8554 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8557 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8558 (parallel [(const_int 0)]))
8559 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8561 "phaddd\t{%2, %0|%0, %2}"
8562 [(set_attr "type" "sseiadd")
8563 (set_attr "atom_unit" "complex")
8564 (set_attr "prefix_extra" "1")
8565 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8566 (set_attr "mode" "DI")])
8568 (define_insn "*avx_phaddswv8hi3"
8569 [(set (match_operand:V8HI 0 "register_operand" "=x")
8575 (match_operand:V8HI 1 "register_operand" "x")
8576 (parallel [(const_int 0)]))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8579 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8583 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8592 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8593 (parallel [(const_int 0)]))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8596 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8597 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8600 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8603 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8604 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8606 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8607 [(set_attr "type" "sseiadd")
8608 (set_attr "prefix_extra" "1")
8609 (set_attr "prefix" "vex")
8610 (set_attr "mode" "TI")])
8612 (define_insn "ssse3_phaddswv8hi3"
8613 [(set (match_operand:V8HI 0 "register_operand" "=x")
8619 (match_operand:V8HI 1 "register_operand" "0")
8620 (parallel [(const_int 0)]))
8621 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8623 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8624 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8627 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8628 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8630 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8631 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8636 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8637 (parallel [(const_int 0)]))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8640 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8644 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8647 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8650 "phaddsw\t{%2, %0|%0, %2}"
8651 [(set_attr "type" "sseiadd")
8652 (set_attr "atom_unit" "complex")
8653 (set_attr "prefix_data16" "1")
8654 (set_attr "prefix_extra" "1")
8655 (set_attr "mode" "TI")])
8657 (define_insn "ssse3_phaddswv4hi3"
8658 [(set (match_operand:V4HI 0 "register_operand" "=y")
8663 (match_operand:V4HI 1 "register_operand" "0")
8664 (parallel [(const_int 0)]))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8667 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8672 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8673 (parallel [(const_int 0)]))
8674 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8679 "phaddsw\t{%2, %0|%0, %2}"
8680 [(set_attr "type" "sseiadd")
8681 (set_attr "atom_unit" "complex")
8682 (set_attr "prefix_extra" "1")
8683 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8684 (set_attr "mode" "DI")])
8686 (define_insn "*avx_phsubwv8hi3"
8687 [(set (match_operand:V8HI 0 "register_operand" "=x")
8693 (match_operand:V8HI 1 "register_operand" "x")
8694 (parallel [(const_int 0)]))
8695 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8697 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8698 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8701 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8702 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8704 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8705 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8710 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8711 (parallel [(const_int 0)]))
8712 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8714 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8715 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8718 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8724 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8725 [(set_attr "type" "sseiadd")
8726 (set_attr "prefix_extra" "1")
8727 (set_attr "prefix" "vex")
8728 (set_attr "mode" "TI")])
8730 (define_insn "ssse3_phsubwv8hi3"
8731 [(set (match_operand:V8HI 0 "register_operand" "=x")
8737 (match_operand:V8HI 1 "register_operand" "0")
8738 (parallel [(const_int 0)]))
8739 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8741 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8742 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8745 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8746 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8748 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8749 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8754 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8755 (parallel [(const_int 0)]))
8756 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8758 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8759 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8762 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8763 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8765 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8766 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8768 "phsubw\t{%2, %0|%0, %2}"
8769 [(set_attr "type" "sseiadd")
8770 (set_attr "atom_unit" "complex")
8771 (set_attr "prefix_data16" "1")
8772 (set_attr "prefix_extra" "1")
8773 (set_attr "mode" "TI")])
8775 (define_insn "ssse3_phsubwv4hi3"
8776 [(set (match_operand:V4HI 0 "register_operand" "=y")
8781 (match_operand:V4HI 1 "register_operand" "0")
8782 (parallel [(const_int 0)]))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8785 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8786 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8790 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8791 (parallel [(const_int 0)]))
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8794 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8797 "phsubw\t{%2, %0|%0, %2}"
8798 [(set_attr "type" "sseiadd")
8799 (set_attr "atom_unit" "complex")
8800 (set_attr "prefix_extra" "1")
8801 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8802 (set_attr "mode" "DI")])
8804 (define_insn "*avx_phsubdv4si3"
8805 [(set (match_operand:V4SI 0 "register_operand" "=x")
8810 (match_operand:V4SI 1 "register_operand" "x")
8811 (parallel [(const_int 0)]))
8812 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8814 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8815 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8819 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8820 (parallel [(const_int 0)]))
8821 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8823 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8824 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8826 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8827 [(set_attr "type" "sseiadd")
8828 (set_attr "prefix_extra" "1")
8829 (set_attr "prefix" "vex")
8830 (set_attr "mode" "TI")])
8832 (define_insn "ssse3_phsubdv4si3"
8833 [(set (match_operand:V4SI 0 "register_operand" "=x")
8838 (match_operand:V4SI 1 "register_operand" "0")
8839 (parallel [(const_int 0)]))
8840 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8842 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8843 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8847 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8848 (parallel [(const_int 0)]))
8849 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8851 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8852 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8854 "phsubd\t{%2, %0|%0, %2}"
8855 [(set_attr "type" "sseiadd")
8856 (set_attr "atom_unit" "complex")
8857 (set_attr "prefix_data16" "1")
8858 (set_attr "prefix_extra" "1")
8859 (set_attr "mode" "TI")])
8861 (define_insn "ssse3_phsubdv2si3"
8862 [(set (match_operand:V2SI 0 "register_operand" "=y")
8866 (match_operand:V2SI 1 "register_operand" "0")
8867 (parallel [(const_int 0)]))
8868 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8871 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8872 (parallel [(const_int 0)]))
8873 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8875 "phsubd\t{%2, %0|%0, %2}"
8876 [(set_attr "type" "sseiadd")
8877 (set_attr "atom_unit" "complex")
8878 (set_attr "prefix_extra" "1")
8879 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8880 (set_attr "mode" "DI")])
8882 (define_insn "*avx_phsubswv8hi3"
8883 [(set (match_operand:V8HI 0 "register_operand" "=x")
8889 (match_operand:V8HI 1 "register_operand" "x")
8890 (parallel [(const_int 0)]))
8891 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8893 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8894 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8897 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8898 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8900 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8901 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8906 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8907 (parallel [(const_int 0)]))
8908 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8910 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8911 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8914 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8915 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8917 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8918 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8920 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8921 [(set_attr "type" "sseiadd")
8922 (set_attr "prefix_extra" "1")
8923 (set_attr "prefix" "vex")
8924 (set_attr "mode" "TI")])
8926 (define_insn "ssse3_phsubswv8hi3"
8927 [(set (match_operand:V8HI 0 "register_operand" "=x")
8933 (match_operand:V8HI 1 "register_operand" "0")
8934 (parallel [(const_int 0)]))
8935 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8937 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8938 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8941 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8942 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8944 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8945 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8950 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8951 (parallel [(const_int 0)]))
8952 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8954 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8955 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8958 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8959 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8961 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8962 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8964 "phsubsw\t{%2, %0|%0, %2}"
8965 [(set_attr "type" "sseiadd")
8966 (set_attr "atom_unit" "complex")
8967 (set_attr "prefix_data16" "1")
8968 (set_attr "prefix_extra" "1")
8969 (set_attr "mode" "TI")])
8971 (define_insn "ssse3_phsubswv4hi3"
8972 [(set (match_operand:V4HI 0 "register_operand" "=y")
8977 (match_operand:V4HI 1 "register_operand" "0")
8978 (parallel [(const_int 0)]))
8979 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8981 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8982 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8986 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8987 (parallel [(const_int 0)]))
8988 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8990 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8991 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8993 "phsubsw\t{%2, %0|%0, %2}"
8994 [(set_attr "type" "sseiadd")
8995 (set_attr "atom_unit" "complex")
8996 (set_attr "prefix_extra" "1")
8997 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8998 (set_attr "mode" "DI")])
9000 (define_insn "*avx_pmaddubsw128"
9001 [(set (match_operand:V8HI 0 "register_operand" "=x")
9006 (match_operand:V16QI 1 "register_operand" "x")
9007 (parallel [(const_int 0)
9017 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9018 (parallel [(const_int 0)
9028 (vec_select:V16QI (match_dup 1)
9029 (parallel [(const_int 1)
9038 (vec_select:V16QI (match_dup 2)
9039 (parallel [(const_int 1)
9046 (const_int 15)]))))))]
9048 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9049 [(set_attr "type" "sseiadd")
9050 (set_attr "prefix_extra" "1")
9051 (set_attr "prefix" "vex")
9052 (set_attr "mode" "TI")])
9054 (define_insn "ssse3_pmaddubsw128"
9055 [(set (match_operand:V8HI 0 "register_operand" "=x")
9060 (match_operand:V16QI 1 "register_operand" "0")
9061 (parallel [(const_int 0)
9071 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9072 (parallel [(const_int 0)
9082 (vec_select:V16QI (match_dup 1)
9083 (parallel [(const_int 1)
9092 (vec_select:V16QI (match_dup 2)
9093 (parallel [(const_int 1)
9100 (const_int 15)]))))))]
9102 "pmaddubsw\t{%2, %0|%0, %2}"
9103 [(set_attr "type" "sseiadd")
9104 (set_attr "atom_unit" "simul")
9105 (set_attr "prefix_data16" "1")
9106 (set_attr "prefix_extra" "1")
9107 (set_attr "mode" "TI")])
9109 (define_insn "ssse3_pmaddubsw"
9110 [(set (match_operand:V4HI 0 "register_operand" "=y")
9115 (match_operand:V8QI 1 "register_operand" "0")
9116 (parallel [(const_int 0)
9122 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9123 (parallel [(const_int 0)
9129 (vec_select:V8QI (match_dup 1)
9130 (parallel [(const_int 1)
9135 (vec_select:V8QI (match_dup 2)
9136 (parallel [(const_int 1)
9139 (const_int 7)]))))))]
9141 "pmaddubsw\t{%2, %0|%0, %2}"
9142 [(set_attr "type" "sseiadd")
9143 (set_attr "atom_unit" "simul")
9144 (set_attr "prefix_extra" "1")
9145 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9146 (set_attr "mode" "DI")])
9148 (define_expand "ssse3_pmulhrswv8hi3"
9149 [(set (match_operand:V8HI 0 "register_operand" "")
9156 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9158 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9160 (const_vector:V8HI [(const_int 1) (const_int 1)
9161 (const_int 1) (const_int 1)
9162 (const_int 1) (const_int 1)
9163 (const_int 1) (const_int 1)]))
9166 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9168 (define_insn "*avx_pmulhrswv8hi3"
9169 [(set (match_operand:V8HI 0 "register_operand" "=x")
9176 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9178 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9180 (const_vector:V8HI [(const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)]))
9185 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9186 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9187 [(set_attr "type" "sseimul")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "prefix" "vex")
9190 (set_attr "mode" "TI")])
9192 (define_insn "*ssse3_pmulhrswv8hi3"
9193 [(set (match_operand:V8HI 0 "register_operand" "=x")
9200 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9202 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9204 (const_vector:V8HI [(const_int 1) (const_int 1)
9205 (const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)]))
9209 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9210 "pmulhrsw\t{%2, %0|%0, %2}"
9211 [(set_attr "type" "sseimul")
9212 (set_attr "prefix_data16" "1")
9213 (set_attr "prefix_extra" "1")
9214 (set_attr "mode" "TI")])
9216 (define_expand "ssse3_pmulhrswv4hi3"
9217 [(set (match_operand:V4HI 0 "register_operand" "")
9224 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9226 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9228 (const_vector:V4HI [(const_int 1) (const_int 1)
9229 (const_int 1) (const_int 1)]))
9232 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9234 (define_insn "*ssse3_pmulhrswv4hi3"
9235 [(set (match_operand:V4HI 0 "register_operand" "=y")
9242 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9244 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9246 (const_vector:V4HI [(const_int 1) (const_int 1)
9247 (const_int 1) (const_int 1)]))
9249 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9250 "pmulhrsw\t{%2, %0|%0, %2}"
9251 [(set_attr "type" "sseimul")
9252 (set_attr "prefix_extra" "1")
9253 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9254 (set_attr "mode" "DI")])
9256 (define_insn "*avx_pshufbv16qi3"
9257 [(set (match_operand:V16QI 0 "register_operand" "=x")
9258 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9259 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9262 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9263 [(set_attr "type" "sselog1")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "prefix" "vex")
9266 (set_attr "mode" "TI")])
9268 (define_insn "ssse3_pshufbv16qi3"
9269 [(set (match_operand:V16QI 0 "register_operand" "=x")
9270 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9271 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9274 "pshufb\t{%2, %0|%0, %2}";
9275 [(set_attr "type" "sselog1")
9276 (set_attr "prefix_data16" "1")
9277 (set_attr "prefix_extra" "1")
9278 (set_attr "mode" "TI")])
9280 (define_insn "ssse3_pshufbv8qi3"
9281 [(set (match_operand:V8QI 0 "register_operand" "=y")
9282 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9283 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9286 "pshufb\t{%2, %0|%0, %2}";
9287 [(set_attr "type" "sselog1")
9288 (set_attr "prefix_extra" "1")
9289 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9290 (set_attr "mode" "DI")])
9292 (define_insn "*avx_psign<mode>3"
9293 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9295 [(match_operand:SSEMODE124 1 "register_operand" "x")
9296 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9299 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9300 [(set_attr "type" "sselog1")
9301 (set_attr "prefix_extra" "1")
9302 (set_attr "prefix" "vex")
9303 (set_attr "mode" "TI")])
9305 (define_insn "ssse3_psign<mode>3"
9306 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9308 [(match_operand:SSEMODE124 1 "register_operand" "0")
9309 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9312 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9313 [(set_attr "type" "sselog1")
9314 (set_attr "prefix_data16" "1")
9315 (set_attr "prefix_extra" "1")
9316 (set_attr "mode" "TI")])
9318 (define_insn "ssse3_psign<mode>3"
9319 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9321 [(match_operand:MMXMODEI 1 "register_operand" "0")
9322 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9325 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9326 [(set_attr "type" "sselog1")
9327 (set_attr "prefix_extra" "1")
9328 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9329 (set_attr "mode" "DI")])
9331 (define_insn "*avx_palignrti"
9332 [(set (match_operand:TI 0 "register_operand" "=x")
9333 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9334 (match_operand:TI 2 "nonimmediate_operand" "xm")
9335 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9339 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9340 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9342 [(set_attr "type" "sseishft")
9343 (set_attr "prefix_extra" "1")
9344 (set_attr "length_immediate" "1")
9345 (set_attr "prefix" "vex")
9346 (set_attr "mode" "TI")])
9348 (define_insn "ssse3_palignrti"
9349 [(set (match_operand:TI 0 "register_operand" "=x")
9350 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9351 (match_operand:TI 2 "nonimmediate_operand" "xm")
9352 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9356 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9357 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9359 [(set_attr "type" "sseishft")
9360 (set_attr "atom_unit" "sishuf")
9361 (set_attr "prefix_data16" "1")
9362 (set_attr "prefix_extra" "1")
9363 (set_attr "length_immediate" "1")
9364 (set_attr "mode" "TI")])
9366 (define_insn "ssse3_palignrdi"
9367 [(set (match_operand:DI 0 "register_operand" "=y")
9368 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9369 (match_operand:DI 2 "nonimmediate_operand" "ym")
9370 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9374 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9375 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9377 [(set_attr "type" "sseishft")
9378 (set_attr "atom_unit" "sishuf")
9379 (set_attr "prefix_extra" "1")
9380 (set_attr "length_immediate" "1")
9381 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9382 (set_attr "mode" "DI")])
9384 (define_insn "abs<mode>2"
9385 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9386 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9388 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9389 [(set_attr "type" "sselog1")
9390 (set_attr "prefix_data16" "1")
9391 (set_attr "prefix_extra" "1")
9392 (set_attr "prefix" "maybe_vex")
9393 (set_attr "mode" "TI")])
9395 (define_insn "abs<mode>2"
9396 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9397 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9399 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9400 [(set_attr "type" "sselog1")
9401 (set_attr "prefix_rep" "0")
9402 (set_attr "prefix_extra" "1")
9403 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9404 (set_attr "mode" "DI")])
9406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9408 ;; AMD SSE4A instructions
9410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9412 (define_insn "sse4a_movnt<mode>"
9413 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9415 [(match_operand:MODEF 1 "register_operand" "x")]
9418 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9419 [(set_attr "type" "ssemov")
9420 (set_attr "mode" "<MODE>")])
9422 (define_insn "sse4a_vmmovnt<mode>"
9423 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9424 (unspec:<ssescalarmode>
9425 [(vec_select:<ssescalarmode>
9426 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9427 (parallel [(const_int 0)]))]
9430 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9431 [(set_attr "type" "ssemov")
9432 (set_attr "mode" "<ssescalarmode>")])
9434 (define_insn "sse4a_extrqi"
9435 [(set (match_operand:V2DI 0 "register_operand" "=x")
9436 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9437 (match_operand 2 "const_int_operand" "")
9438 (match_operand 3 "const_int_operand" "")]
9441 "extrq\t{%3, %2, %0|%0, %2, %3}"
9442 [(set_attr "type" "sse")
9443 (set_attr "prefix_data16" "1")
9444 (set_attr "length_immediate" "2")
9445 (set_attr "mode" "TI")])
9447 (define_insn "sse4a_extrq"
9448 [(set (match_operand:V2DI 0 "register_operand" "=x")
9449 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9450 (match_operand:V16QI 2 "register_operand" "x")]
9453 "extrq\t{%2, %0|%0, %2}"
9454 [(set_attr "type" "sse")
9455 (set_attr "prefix_data16" "1")
9456 (set_attr "mode" "TI")])
9458 (define_insn "sse4a_insertqi"
9459 [(set (match_operand:V2DI 0 "register_operand" "=x")
9460 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9461 (match_operand:V2DI 2 "register_operand" "x")
9462 (match_operand 3 "const_int_operand" "")
9463 (match_operand 4 "const_int_operand" "")]
9466 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9467 [(set_attr "type" "sseins")
9468 (set_attr "prefix_data16" "0")
9469 (set_attr "prefix_rep" "1")
9470 (set_attr "length_immediate" "2")
9471 (set_attr "mode" "TI")])
9473 (define_insn "sse4a_insertq"
9474 [(set (match_operand:V2DI 0 "register_operand" "=x")
9475 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9476 (match_operand:V2DI 2 "register_operand" "x")]
9479 "insertq\t{%2, %0|%0, %2}"
9480 [(set_attr "type" "sseins")
9481 (set_attr "prefix_data16" "0")
9482 (set_attr "prefix_rep" "1")
9483 (set_attr "mode" "TI")])
9485 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9487 ;; Intel SSE4.1 instructions
9489 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9491 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9492 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9493 (vec_merge:AVXMODEF2P
9494 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9495 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9496 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9498 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9499 [(set_attr "type" "ssemov")
9500 (set_attr "prefix_extra" "1")
9501 (set_attr "length_immediate" "1")
9502 (set_attr "prefix" "vex")
9503 (set_attr "mode" "<avxvecmode>")])
9505 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9506 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9508 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9509 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9510 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9513 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9514 [(set_attr "type" "ssemov")
9515 (set_attr "prefix_extra" "1")
9516 (set_attr "length_immediate" "1")
9517 (set_attr "prefix" "vex")
9518 (set_attr "mode" "<avxvecmode>")])
9520 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9521 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9522 (vec_merge:SSEMODEF2P
9523 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9524 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9525 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9527 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9528 [(set_attr "type" "ssemov")
9529 (set_attr "prefix_data16" "1")
9530 (set_attr "prefix_extra" "1")
9531 (set_attr "length_immediate" "1")
9532 (set_attr "mode" "<MODE>")])
9534 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9535 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9537 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9538 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9539 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9542 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9543 [(set_attr "type" "ssemov")
9544 (set_attr "prefix_data16" "1")
9545 (set_attr "prefix_extra" "1")
9546 (set_attr "mode" "<MODE>")])
9548 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9549 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9551 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9552 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9553 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9556 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9557 [(set_attr "type" "ssemul")
9558 (set_attr "prefix" "vex")
9559 (set_attr "prefix_extra" "1")
9560 (set_attr "length_immediate" "1")
9561 (set_attr "mode" "<avxvecmode>")])
9563 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9564 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9566 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9567 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9568 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9571 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9572 [(set_attr "type" "ssemul")
9573 (set_attr "prefix_data16" "1")
9574 (set_attr "prefix_extra" "1")
9575 (set_attr "length_immediate" "1")
9576 (set_attr "mode" "<MODE>")])
9578 (define_insn "sse4_1_movntdqa"
9579 [(set (match_operand:V2DI 0 "register_operand" "=x")
9580 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9583 "%vmovntdqa\t{%1, %0|%0, %1}"
9584 [(set_attr "type" "ssemov")
9585 (set_attr "prefix_extra" "1")
9586 (set_attr "prefix" "maybe_vex")
9587 (set_attr "mode" "TI")])
9589 (define_insn "*avx_mpsadbw"
9590 [(set (match_operand:V16QI 0 "register_operand" "=x")
9591 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9592 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9593 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9596 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9597 [(set_attr "type" "sselog1")
9598 (set_attr "prefix" "vex")
9599 (set_attr "prefix_extra" "1")
9600 (set_attr "length_immediate" "1")
9601 (set_attr "mode" "TI")])
9603 (define_insn "sse4_1_mpsadbw"
9604 [(set (match_operand:V16QI 0 "register_operand" "=x")
9605 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9606 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9607 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9610 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9611 [(set_attr "type" "sselog1")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "length_immediate" "1")
9614 (set_attr "mode" "TI")])
9616 (define_insn "*avx_packusdw"
9617 [(set (match_operand:V8HI 0 "register_operand" "=x")
9620 (match_operand:V4SI 1 "register_operand" "x"))
9622 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9624 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9625 [(set_attr "type" "sselog")
9626 (set_attr "prefix_extra" "1")
9627 (set_attr "prefix" "vex")
9628 (set_attr "mode" "TI")])
9630 (define_insn "sse4_1_packusdw"
9631 [(set (match_operand:V8HI 0 "register_operand" "=x")
9634 (match_operand:V4SI 1 "register_operand" "0"))
9636 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9638 "packusdw\t{%2, %0|%0, %2}"
9639 [(set_attr "type" "sselog")
9640 (set_attr "prefix_extra" "1")
9641 (set_attr "mode" "TI")])
9643 (define_insn "*avx_pblendvb"
9644 [(set (match_operand:V16QI 0 "register_operand" "=x")
9645 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9646 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9647 (match_operand:V16QI 3 "register_operand" "x")]
9650 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9651 [(set_attr "type" "ssemov")
9652 (set_attr "prefix_extra" "1")
9653 (set_attr "length_immediate" "1")
9654 (set_attr "prefix" "vex")
9655 (set_attr "mode" "TI")])
9657 (define_insn "sse4_1_pblendvb"
9658 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9659 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9660 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9661 (match_operand:V16QI 3 "register_operand" "Yz")]
9664 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9665 [(set_attr "type" "ssemov")
9666 (set_attr "prefix_extra" "1")
9667 (set_attr "mode" "TI")])
9669 (define_insn "*avx_pblendw"
9670 [(set (match_operand:V8HI 0 "register_operand" "=x")
9672 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9673 (match_operand:V8HI 1 "register_operand" "x")
9674 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9676 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9677 [(set_attr "type" "ssemov")
9678 (set_attr "prefix" "vex")
9679 (set_attr "prefix_extra" "1")
9680 (set_attr "length_immediate" "1")
9681 (set_attr "mode" "TI")])
9683 (define_insn "sse4_1_pblendw"
9684 [(set (match_operand:V8HI 0 "register_operand" "=x")
9686 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9687 (match_operand:V8HI 1 "register_operand" "0")
9688 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9690 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9691 [(set_attr "type" "ssemov")
9692 (set_attr "prefix_extra" "1")
9693 (set_attr "length_immediate" "1")
9694 (set_attr "mode" "TI")])
9696 (define_insn "sse4_1_phminposuw"
9697 [(set (match_operand:V8HI 0 "register_operand" "=x")
9698 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9699 UNSPEC_PHMINPOSUW))]
9701 "%vphminposuw\t{%1, %0|%0, %1}"
9702 [(set_attr "type" "sselog1")
9703 (set_attr "prefix_extra" "1")
9704 (set_attr "prefix" "maybe_vex")
9705 (set_attr "mode" "TI")])
9707 (define_insn "sse4_1_extendv8qiv8hi2"
9708 [(set (match_operand:V8HI 0 "register_operand" "=x")
9711 (match_operand:V16QI 1 "register_operand" "x")
9712 (parallel [(const_int 0)
9721 "%vpmovsxbw\t{%1, %0|%0, %1}"
9722 [(set_attr "type" "ssemov")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "maybe_vex")
9725 (set_attr "mode" "TI")])
9727 (define_insn "*sse4_1_extendv8qiv8hi2"
9728 [(set (match_operand:V8HI 0 "register_operand" "=x")
9731 (vec_duplicate:V16QI
9732 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9733 (parallel [(const_int 0)
9742 "%vpmovsxbw\t{%1, %0|%0, %1}"
9743 [(set_attr "type" "ssemov")
9744 (set_attr "prefix_extra" "1")
9745 (set_attr "prefix" "maybe_vex")
9746 (set_attr "mode" "TI")])
9748 (define_insn "sse4_1_extendv4qiv4si2"
9749 [(set (match_operand:V4SI 0 "register_operand" "=x")
9752 (match_operand:V16QI 1 "register_operand" "x")
9753 (parallel [(const_int 0)
9758 "%vpmovsxbd\t{%1, %0|%0, %1}"
9759 [(set_attr "type" "ssemov")
9760 (set_attr "prefix_extra" "1")
9761 (set_attr "prefix" "maybe_vex")
9762 (set_attr "mode" "TI")])
9764 (define_insn "*sse4_1_extendv4qiv4si2"
9765 [(set (match_operand:V4SI 0 "register_operand" "=x")
9768 (vec_duplicate:V16QI
9769 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9770 (parallel [(const_int 0)
9775 "%vpmovsxbd\t{%1, %0|%0, %1}"
9776 [(set_attr "type" "ssemov")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "prefix" "maybe_vex")
9779 (set_attr "mode" "TI")])
9781 (define_insn "sse4_1_extendv2qiv2di2"
9782 [(set (match_operand:V2DI 0 "register_operand" "=x")
9785 (match_operand:V16QI 1 "register_operand" "x")
9786 (parallel [(const_int 0)
9789 "%vpmovsxbq\t{%1, %0|%0, %1}"
9790 [(set_attr "type" "ssemov")
9791 (set_attr "prefix_extra" "1")
9792 (set_attr "prefix" "maybe_vex")
9793 (set_attr "mode" "TI")])
9795 (define_insn "*sse4_1_extendv2qiv2di2"
9796 [(set (match_operand:V2DI 0 "register_operand" "=x")
9799 (vec_duplicate:V16QI
9800 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9801 (parallel [(const_int 0)
9804 "%vpmovsxbq\t{%1, %0|%0, %1}"
9805 [(set_attr "type" "ssemov")
9806 (set_attr "prefix_extra" "1")
9807 (set_attr "prefix" "maybe_vex")
9808 (set_attr "mode" "TI")])
9810 (define_insn "sse4_1_extendv4hiv4si2"
9811 [(set (match_operand:V4SI 0 "register_operand" "=x")
9814 (match_operand:V8HI 1 "register_operand" "x")
9815 (parallel [(const_int 0)
9820 "%vpmovsxwd\t{%1, %0|%0, %1}"
9821 [(set_attr "type" "ssemov")
9822 (set_attr "prefix_extra" "1")
9823 (set_attr "prefix" "maybe_vex")
9824 (set_attr "mode" "TI")])
9826 (define_insn "*sse4_1_extendv4hiv4si2"
9827 [(set (match_operand:V4SI 0 "register_operand" "=x")
9831 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9832 (parallel [(const_int 0)
9837 "%vpmovsxwd\t{%1, %0|%0, %1}"
9838 [(set_attr "type" "ssemov")
9839 (set_attr "prefix_extra" "1")
9840 (set_attr "prefix" "maybe_vex")
9841 (set_attr "mode" "TI")])
9843 (define_insn "sse4_1_extendv2hiv2di2"
9844 [(set (match_operand:V2DI 0 "register_operand" "=x")
9847 (match_operand:V8HI 1 "register_operand" "x")
9848 (parallel [(const_int 0)
9851 "%vpmovsxwq\t{%1, %0|%0, %1}"
9852 [(set_attr "type" "ssemov")
9853 (set_attr "prefix_extra" "1")
9854 (set_attr "prefix" "maybe_vex")
9855 (set_attr "mode" "TI")])
9857 (define_insn "*sse4_1_extendv2hiv2di2"
9858 [(set (match_operand:V2DI 0 "register_operand" "=x")
9862 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9863 (parallel [(const_int 0)
9866 "%vpmovsxwq\t{%1, %0|%0, %1}"
9867 [(set_attr "type" "ssemov")
9868 (set_attr "prefix_extra" "1")
9869 (set_attr "prefix" "maybe_vex")
9870 (set_attr "mode" "TI")])
9872 (define_insn "sse4_1_extendv2siv2di2"
9873 [(set (match_operand:V2DI 0 "register_operand" "=x")
9876 (match_operand:V4SI 1 "register_operand" "x")
9877 (parallel [(const_int 0)
9880 "%vpmovsxdq\t{%1, %0|%0, %1}"
9881 [(set_attr "type" "ssemov")
9882 (set_attr "prefix_extra" "1")
9883 (set_attr "prefix" "maybe_vex")
9884 (set_attr "mode" "TI")])
9886 (define_insn "*sse4_1_extendv2siv2di2"
9887 [(set (match_operand:V2DI 0 "register_operand" "=x")
9891 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9892 (parallel [(const_int 0)
9895 "%vpmovsxdq\t{%1, %0|%0, %1}"
9896 [(set_attr "type" "ssemov")
9897 (set_attr "prefix_extra" "1")
9898 (set_attr "prefix" "maybe_vex")
9899 (set_attr "mode" "TI")])
9901 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9902 [(set (match_operand:V8HI 0 "register_operand" "=x")
9905 (match_operand:V16QI 1 "register_operand" "x")
9906 (parallel [(const_int 0)
9915 "%vpmovzxbw\t{%1, %0|%0, %1}"
9916 [(set_attr "type" "ssemov")
9917 (set_attr "prefix_extra" "1")
9918 (set_attr "prefix" "maybe_vex")
9919 (set_attr "mode" "TI")])
9921 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9922 [(set (match_operand:V8HI 0 "register_operand" "=x")
9925 (vec_duplicate:V16QI
9926 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9927 (parallel [(const_int 0)
9936 "%vpmovzxbw\t{%1, %0|%0, %1}"
9937 [(set_attr "type" "ssemov")
9938 (set_attr "prefix_extra" "1")
9939 (set_attr "prefix" "maybe_vex")
9940 (set_attr "mode" "TI")])
9942 (define_insn "sse4_1_zero_extendv4qiv4si2"
9943 [(set (match_operand:V4SI 0 "register_operand" "=x")
9946 (match_operand:V16QI 1 "register_operand" "x")
9947 (parallel [(const_int 0)
9952 "%vpmovzxbd\t{%1, %0|%0, %1}"
9953 [(set_attr "type" "ssemov")
9954 (set_attr "prefix_extra" "1")
9955 (set_attr "prefix" "maybe_vex")
9956 (set_attr "mode" "TI")])
9958 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9959 [(set (match_operand:V4SI 0 "register_operand" "=x")
9962 (vec_duplicate:V16QI
9963 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9964 (parallel [(const_int 0)
9969 "%vpmovzxbd\t{%1, %0|%0, %1}"
9970 [(set_attr "type" "ssemov")
9971 (set_attr "prefix_extra" "1")
9972 (set_attr "prefix" "maybe_vex")
9973 (set_attr "mode" "TI")])
9975 (define_insn "sse4_1_zero_extendv2qiv2di2"
9976 [(set (match_operand:V2DI 0 "register_operand" "=x")
9979 (match_operand:V16QI 1 "register_operand" "x")
9980 (parallel [(const_int 0)
9983 "%vpmovzxbq\t{%1, %0|%0, %1}"
9984 [(set_attr "type" "ssemov")
9985 (set_attr "prefix_extra" "1")
9986 (set_attr "prefix" "maybe_vex")
9987 (set_attr "mode" "TI")])
9989 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9990 [(set (match_operand:V2DI 0 "register_operand" "=x")
9993 (vec_duplicate:V16QI
9994 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9995 (parallel [(const_int 0)
9998 "%vpmovzxbq\t{%1, %0|%0, %1}"
9999 [(set_attr "type" "ssemov")
10000 (set_attr "prefix_extra" "1")
10001 (set_attr "prefix" "maybe_vex")
10002 (set_attr "mode" "TI")])
10004 (define_insn "sse4_1_zero_extendv4hiv4si2"
10005 [(set (match_operand:V4SI 0 "register_operand" "=x")
10008 (match_operand:V8HI 1 "register_operand" "x")
10009 (parallel [(const_int 0)
10012 (const_int 3)]))))]
10014 "%vpmovzxwd\t{%1, %0|%0, %1}"
10015 [(set_attr "type" "ssemov")
10016 (set_attr "prefix_extra" "1")
10017 (set_attr "prefix" "maybe_vex")
10018 (set_attr "mode" "TI")])
10020 (define_insn "*sse4_1_zero_extendv4hiv4si2"
10021 [(set (match_operand:V4SI 0 "register_operand" "=x")
10024 (vec_duplicate:V8HI
10025 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
10026 (parallel [(const_int 0)
10029 (const_int 3)]))))]
10031 "%vpmovzxwd\t{%1, %0|%0, %1}"
10032 [(set_attr "type" "ssemov")
10033 (set_attr "prefix_extra" "1")
10034 (set_attr "prefix" "maybe_vex")
10035 (set_attr "mode" "TI")])
10037 (define_insn "sse4_1_zero_extendv2hiv2di2"
10038 [(set (match_operand:V2DI 0 "register_operand" "=x")
10041 (match_operand:V8HI 1 "register_operand" "x")
10042 (parallel [(const_int 0)
10043 (const_int 1)]))))]
10045 "%vpmovzxwq\t{%1, %0|%0, %1}"
10046 [(set_attr "type" "ssemov")
10047 (set_attr "prefix_extra" "1")
10048 (set_attr "prefix" "maybe_vex")
10049 (set_attr "mode" "TI")])
10051 (define_insn "*sse4_1_zero_extendv2hiv2di2"
10052 [(set (match_operand:V2DI 0 "register_operand" "=x")
10055 (vec_duplicate:V8HI
10056 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
10057 (parallel [(const_int 0)
10058 (const_int 1)]))))]
10060 "%vpmovzxwq\t{%1, %0|%0, %1}"
10061 [(set_attr "type" "ssemov")
10062 (set_attr "prefix_extra" "1")
10063 (set_attr "prefix" "maybe_vex")
10064 (set_attr "mode" "TI")])
10066 (define_insn "sse4_1_zero_extendv2siv2di2"
10067 [(set (match_operand:V2DI 0 "register_operand" "=x")
10070 (match_operand:V4SI 1 "register_operand" "x")
10071 (parallel [(const_int 0)
10072 (const_int 1)]))))]
10074 "%vpmovzxdq\t{%1, %0|%0, %1}"
10075 [(set_attr "type" "ssemov")
10076 (set_attr "prefix_extra" "1")
10077 (set_attr "prefix" "maybe_vex")
10078 (set_attr "mode" "TI")])
10080 (define_insn "*sse4_1_zero_extendv2siv2di2"
10081 [(set (match_operand:V2DI 0 "register_operand" "=x")
10084 (vec_duplicate:V4SI
10085 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
10086 (parallel [(const_int 0)
10087 (const_int 1)]))))]
10089 "%vpmovzxdq\t{%1, %0|%0, %1}"
10090 [(set_attr "type" "ssemov")
10091 (set_attr "prefix_extra" "1")
10092 (set_attr "prefix" "maybe_vex")
10093 (set_attr "mode" "TI")])
10095 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
10096 ;; setting FLAGS_REG. But it is not a really compare instruction.
10097 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
10098 [(set (reg:CC FLAGS_REG)
10099 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
10100 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
10103 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
10104 [(set_attr "type" "ssecomi")
10105 (set_attr "prefix_extra" "1")
10106 (set_attr "prefix" "vex")
10107 (set_attr "mode" "<MODE>")])
10109 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
10110 ;; But it is not a really compare instruction.
10111 (define_insn "avx_ptest256"
10112 [(set (reg:CC FLAGS_REG)
10113 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
10114 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
10117 "vptest\t{%1, %0|%0, %1}"
10118 [(set_attr "type" "ssecomi")
10119 (set_attr "prefix_extra" "1")
10120 (set_attr "prefix" "vex")
10121 (set_attr "mode" "OI")])
10123 (define_insn "sse4_1_ptest"
10124 [(set (reg:CC FLAGS_REG)
10125 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
10126 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10129 "%vptest\t{%1, %0|%0, %1}"
10130 [(set_attr "type" "ssecomi")
10131 (set_attr "prefix_extra" "1")
10132 (set_attr "prefix" "maybe_vex")
10133 (set_attr "mode" "TI")])
10135 (define_insn "avx_roundp<avxmodesuffixf2c>256"
10136 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
10137 (unspec:AVX256MODEF2P
10138 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10139 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10142 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10143 [(set_attr "type" "ssecvt")
10144 (set_attr "prefix_extra" "1")
10145 (set_attr "length_immediate" "1")
10146 (set_attr "prefix" "vex")
10147 (set_attr "mode" "<MODE>")])
10149 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
10150 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10152 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10153 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10156 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10157 [(set_attr "type" "ssecvt")
10158 (set_attr "prefix_data16" "1")
10159 (set_attr "prefix_extra" "1")
10160 (set_attr "length_immediate" "1")
10161 (set_attr "prefix" "maybe_vex")
10162 (set_attr "mode" "<MODE>")])
10164 (define_insn "*avx_rounds<ssemodesuffixf2c>"
10165 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10166 (vec_merge:SSEMODEF2P
10168 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10169 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10171 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10174 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10175 [(set_attr "type" "ssecvt")
10176 (set_attr "prefix_extra" "1")
10177 (set_attr "length_immediate" "1")
10178 (set_attr "prefix" "vex")
10179 (set_attr "mode" "<MODE>")])
10181 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
10182 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10183 (vec_merge:SSEMODEF2P
10185 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10186 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10188 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10191 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10192 [(set_attr "type" "ssecvt")
10193 (set_attr "prefix_data16" "1")
10194 (set_attr "prefix_extra" "1")
10195 (set_attr "length_immediate" "1")
10196 (set_attr "mode" "<MODE>")])
10198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10200 ;; Intel SSE4.2 string/text processing instructions
10202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10204 (define_insn_and_split "sse4_2_pcmpestr"
10205 [(set (match_operand:SI 0 "register_operand" "=c,c")
10207 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10208 (match_operand:SI 3 "register_operand" "a,a")
10209 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10210 (match_operand:SI 5 "register_operand" "d,d")
10211 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10213 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10221 (set (reg:CC FLAGS_REG)
10230 && can_create_pseudo_p ()"
10235 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10236 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10237 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10240 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10241 operands[3], operands[4],
10242 operands[5], operands[6]));
10244 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10245 operands[3], operands[4],
10246 operands[5], operands[6]));
10247 if (flags && !(ecx || xmm0))
10248 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10249 operands[2], operands[3],
10250 operands[4], operands[5],
10254 [(set_attr "type" "sselog")
10255 (set_attr "prefix_data16" "1")
10256 (set_attr "prefix_extra" "1")
10257 (set_attr "length_immediate" "1")
10258 (set_attr "memory" "none,load")
10259 (set_attr "mode" "TI")])
10261 (define_insn "sse4_2_pcmpestri"
10262 [(set (match_operand:SI 0 "register_operand" "=c,c")
10264 [(match_operand:V16QI 1 "register_operand" "x,x")
10265 (match_operand:SI 2 "register_operand" "a,a")
10266 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10267 (match_operand:SI 4 "register_operand" "d,d")
10268 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10270 (set (reg:CC FLAGS_REG)
10279 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10280 [(set_attr "type" "sselog")
10281 (set_attr "prefix_data16" "1")
10282 (set_attr "prefix_extra" "1")
10283 (set_attr "prefix" "maybe_vex")
10284 (set_attr "length_immediate" "1")
10285 (set_attr "memory" "none,load")
10286 (set_attr "mode" "TI")])
10288 (define_insn "sse4_2_pcmpestrm"
10289 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10291 [(match_operand:V16QI 1 "register_operand" "x,x")
10292 (match_operand:SI 2 "register_operand" "a,a")
10293 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10294 (match_operand:SI 4 "register_operand" "d,d")
10295 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10297 (set (reg:CC FLAGS_REG)
10306 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10307 [(set_attr "type" "sselog")
10308 (set_attr "prefix_data16" "1")
10309 (set_attr "prefix_extra" "1")
10310 (set_attr "length_immediate" "1")
10311 (set_attr "prefix" "maybe_vex")
10312 (set_attr "memory" "none,load")
10313 (set_attr "mode" "TI")])
10315 (define_insn "sse4_2_pcmpestr_cconly"
10316 [(set (reg:CC FLAGS_REG)
10318 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10319 (match_operand:SI 3 "register_operand" "a,a,a,a")
10320 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10321 (match_operand:SI 5 "register_operand" "d,d,d,d")
10322 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10324 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10325 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10328 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10329 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10330 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10331 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10332 [(set_attr "type" "sselog")
10333 (set_attr "prefix_data16" "1")
10334 (set_attr "prefix_extra" "1")
10335 (set_attr "length_immediate" "1")
10336 (set_attr "memory" "none,load,none,load")
10337 (set_attr "prefix" "maybe_vex")
10338 (set_attr "mode" "TI")])
10340 (define_insn_and_split "sse4_2_pcmpistr"
10341 [(set (match_operand:SI 0 "register_operand" "=c,c")
10343 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10344 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10345 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10347 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10353 (set (reg:CC FLAGS_REG)
10360 && can_create_pseudo_p ()"
10365 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10366 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10367 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10370 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10371 operands[3], operands[4]));
10373 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10374 operands[3], operands[4]));
10375 if (flags && !(ecx || xmm0))
10376 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10377 operands[2], operands[3],
10381 [(set_attr "type" "sselog")
10382 (set_attr "prefix_data16" "1")
10383 (set_attr "prefix_extra" "1")
10384 (set_attr "length_immediate" "1")
10385 (set_attr "memory" "none,load")
10386 (set_attr "mode" "TI")])
10388 (define_insn "sse4_2_pcmpistri"
10389 [(set (match_operand:SI 0 "register_operand" "=c,c")
10391 [(match_operand:V16QI 1 "register_operand" "x,x")
10392 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10393 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10395 (set (reg:CC FLAGS_REG)
10402 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10403 [(set_attr "type" "sselog")
10404 (set_attr "prefix_data16" "1")
10405 (set_attr "prefix_extra" "1")
10406 (set_attr "length_immediate" "1")
10407 (set_attr "prefix" "maybe_vex")
10408 (set_attr "memory" "none,load")
10409 (set_attr "mode" "TI")])
10411 (define_insn "sse4_2_pcmpistrm"
10412 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10414 [(match_operand:V16QI 1 "register_operand" "x,x")
10415 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10416 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10418 (set (reg:CC FLAGS_REG)
10425 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10426 [(set_attr "type" "sselog")
10427 (set_attr "prefix_data16" "1")
10428 (set_attr "prefix_extra" "1")
10429 (set_attr "length_immediate" "1")
10430 (set_attr "prefix" "maybe_vex")
10431 (set_attr "memory" "none,load")
10432 (set_attr "mode" "TI")])
10434 (define_insn "sse4_2_pcmpistr_cconly"
10435 [(set (reg:CC FLAGS_REG)
10437 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10438 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10439 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10441 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10442 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10445 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10446 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10447 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10448 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10449 [(set_attr "type" "sselog")
10450 (set_attr "prefix_data16" "1")
10451 (set_attr "prefix_extra" "1")
10452 (set_attr "length_immediate" "1")
10453 (set_attr "memory" "none,load,none,load")
10454 (set_attr "prefix" "maybe_vex")
10455 (set_attr "mode" "TI")])
10457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10459 ;; XOP instructions
10461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10463 ;; XOP parallel integer multiply/add instructions.
10464 ;; Note the instruction does not allow the value being added to be a memory
10465 ;; operation. However by pretending via the nonimmediate_operand predicate
10466 ;; that it does and splitting it later allows the following to be recognized:
10467 ;; a[i] = b[i] * c[i] + d[i];
10468 (define_insn "xop_pmacsww"
10469 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10472 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10473 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10474 (match_operand:V8HI 3 "register_operand" "x,x")))]
10475 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10477 vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10478 vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10479 [(set_attr "type" "ssemuladd")
10480 (set_attr "mode" "TI")])
10482 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10484 [(set (match_operand:V8HI 0 "register_operand" "")
10486 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10487 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10488 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10490 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10491 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10492 && !reg_mentioned_p (operands[0], operands[1])
10493 && !reg_mentioned_p (operands[0], operands[2])
10494 && !reg_mentioned_p (operands[0], operands[3])"
10497 ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
10498 emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
10503 (define_insn "xop_pmacssww"
10504 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10506 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10507 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10508 (match_operand:V8HI 3 "register_operand" "x,x")))]
10509 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10511 vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10512 vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10513 [(set_attr "type" "ssemuladd")
10514 (set_attr "mode" "TI")])
10516 ;; Note the instruction does not allow the value being added to be a memory
10517 ;; operation. However by pretending via the nonimmediate_operand predicate
10518 ;; that it does and splitting it later allows the following to be recognized:
10519 ;; a[i] = b[i] * c[i] + d[i];
10520 (define_insn "xop_pmacsdd"
10521 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10524 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10525 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10526 (match_operand:V4SI 3 "register_operand" "x,x")))]
10527 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10529 vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10530 vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10531 [(set_attr "type" "ssemuladd")
10532 (set_attr "mode" "TI")])
10534 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10536 [(set (match_operand:V4SI 0 "register_operand" "")
10538 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10539 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10540 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10542 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10543 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10544 && !reg_mentioned_p (operands[0], operands[1])
10545 && !reg_mentioned_p (operands[0], operands[2])
10546 && !reg_mentioned_p (operands[0], operands[3])"
10549 ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
10550 emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
10555 (define_insn "xop_pmacssdd"
10556 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10558 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10559 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10560 (match_operand:V4SI 3 "register_operand" "x,x")))]
10561 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10563 vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10564 vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10565 [(set_attr "type" "ssemuladd")
10566 (set_attr "mode" "TI")])
10568 (define_insn "xop_pmacssdql"
10569 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10574 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10575 (parallel [(const_int 1)
10578 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10579 (parallel [(const_int 1)
10581 (match_operand:V2DI 3 "register_operand" "x,x")))]
10582 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10584 vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10585 vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10586 [(set_attr "type" "ssemuladd")
10587 (set_attr "mode" "TI")])
10589 (define_insn "xop_pmacssdqh"
10590 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10595 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10596 (parallel [(const_int 0)
10600 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10601 (parallel [(const_int 0)
10603 (match_operand:V2DI 3 "register_operand" "x,x")))]
10604 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10606 vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10607 vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10608 [(set_attr "type" "ssemuladd")
10609 (set_attr "mode" "TI")])
10611 (define_insn "xop_pmacsdql"
10612 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10617 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10618 (parallel [(const_int 1)
10622 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10623 (parallel [(const_int 1)
10625 (match_operand:V2DI 3 "register_operand" "x,x")))]
10626 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10628 vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10629 vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10630 [(set_attr "type" "ssemuladd")
10631 (set_attr "mode" "TI")])
10633 (define_insn_and_split "*xop_pmacsdql_mem"
10634 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10639 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10640 (parallel [(const_int 1)
10644 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10645 (parallel [(const_int 1)
10647 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10648 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10650 "&& (reload_completed
10651 || (!reg_mentioned_p (operands[0], operands[1])
10652 && !reg_mentioned_p (operands[0], operands[2])))"
10653 [(set (match_dup 0)
10661 (parallel [(const_int 1)
10666 (parallel [(const_int 1)
10670 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10671 ;; fake it with a multiply/add. In general, we expect the define_split to
10672 ;; occur before register allocation, so we have to handle the corner case where
10673 ;; the target is the same as operands 1/2
10674 (define_insn_and_split "xop_mulv2div2di3_low"
10675 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10679 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10680 (parallel [(const_int 1)
10684 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10685 (parallel [(const_int 1)
10686 (const_int 3)])))))]
10689 "&& (reload_completed
10690 || (!reg_mentioned_p (operands[0], operands[1])
10691 && !reg_mentioned_p (operands[0], operands[2])))"
10692 [(set (match_dup 0)
10700 (parallel [(const_int 1)
10705 (parallel [(const_int 1)
10709 operands[3] = CONST0_RTX (V2DImode);
10711 [(set_attr "type" "ssemuladd")
10712 (set_attr "mode" "TI")])
10714 (define_insn "xop_pmacsdqh"
10715 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10720 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10721 (parallel [(const_int 0)
10725 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10726 (parallel [(const_int 0)
10728 (match_operand:V2DI 3 "register_operand" "x,x")))]
10729 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10731 vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10732 vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10733 [(set_attr "type" "ssemuladd")
10734 (set_attr "mode" "TI")])
10736 (define_insn_and_split "*xop_pmacsdqh_mem"
10737 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10742 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10743 (parallel [(const_int 0)
10747 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10748 (parallel [(const_int 0)
10750 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10751 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10753 "&& (reload_completed
10754 || (!reg_mentioned_p (operands[0], operands[1])
10755 && !reg_mentioned_p (operands[0], operands[2])))"
10756 [(set (match_dup 0)
10764 (parallel [(const_int 0)
10769 (parallel [(const_int 0)
10773 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10774 ;; fake it with a multiply/add. In general, we expect the define_split to
10775 ;; occur before register allocation, so we have to handle the corner case where
10776 ;; the target is the same as either operands[1] or operands[2]
10777 (define_insn_and_split "xop_mulv2div2di3_high"
10778 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10782 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10783 (parallel [(const_int 0)
10787 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10788 (parallel [(const_int 0)
10789 (const_int 2)])))))]
10792 "&& (reload_completed
10793 || (!reg_mentioned_p (operands[0], operands[1])
10794 && !reg_mentioned_p (operands[0], operands[2])))"
10795 [(set (match_dup 0)
10803 (parallel [(const_int 0)
10808 (parallel [(const_int 0)
10812 operands[3] = CONST0_RTX (V2DImode);
10814 [(set_attr "type" "ssemuladd")
10815 (set_attr "mode" "TI")])
10817 ;; XOP parallel integer multiply/add instructions for the intrinisics
10818 (define_insn "xop_pmacsswd"
10819 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10824 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10825 (parallel [(const_int 1)
10831 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10832 (parallel [(const_int 1)
10836 (match_operand:V4SI 3 "register_operand" "x,x")))]
10837 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10839 vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10840 vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10841 [(set_attr "type" "ssemuladd")
10842 (set_attr "mode" "TI")])
10844 (define_insn "xop_pmacswd"
10845 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10850 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10851 (parallel [(const_int 1)
10857 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10858 (parallel [(const_int 1)
10862 (match_operand:V4SI 3 "register_operand" "x,x")))]
10863 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10865 vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10866 vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10867 [(set_attr "type" "ssemuladd")
10868 (set_attr "mode" "TI")])
10870 (define_insn "xop_pmadcsswd"
10871 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10877 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10878 (parallel [(const_int 0)
10884 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10885 (parallel [(const_int 0)
10893 (parallel [(const_int 1)
10900 (parallel [(const_int 1)
10903 (const_int 7)])))))
10904 (match_operand:V4SI 3 "register_operand" "x,x")))]
10905 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10907 vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10908 vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10909 [(set_attr "type" "ssemuladd")
10910 (set_attr "mode" "TI")])
10912 (define_insn "xop_pmadcswd"
10913 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10919 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10920 (parallel [(const_int 0)
10926 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10927 (parallel [(const_int 0)
10935 (parallel [(const_int 1)
10942 (parallel [(const_int 1)
10945 (const_int 7)])))))
10946 (match_operand:V4SI 3 "register_operand" "x,x")))]
10947 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10949 vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10950 vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10951 [(set_attr "type" "ssemuladd")
10952 (set_attr "mode" "TI")])
10954 ;; XOP parallel XMM conditional moves
10955 (define_insn "xop_pcmov_<mode>"
10956 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
10957 (if_then_else:SSEMODE
10958 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m")
10959 (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x")
10960 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
10961 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10962 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10963 [(set_attr "type" "sse4arg")])
10965 (define_insn "xop_pcmov_<mode>256"
10966 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
10967 (if_then_else:AVX256MODE
10968 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m")
10969 (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x")
10970 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
10971 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10972 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10973 [(set_attr "type" "sse4arg")])
10975 ;; XOP horizontal add/subtract instructions
10976 (define_insn "xop_phaddbw"
10977 [(set (match_operand:V8HI 0 "register_operand" "=x")
10981 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10982 (parallel [(const_int 0)
10993 (parallel [(const_int 1)
11000 (const_int 15)])))))]
11002 "vphaddbw\t{%1, %0|%0, %1}"
11003 [(set_attr "type" "sseiadd1")])
11005 (define_insn "xop_phaddbd"
11006 [(set (match_operand:V4SI 0 "register_operand" "=x")
11011 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11012 (parallel [(const_int 0)
11019 (parallel [(const_int 1)
11022 (const_int 13)]))))
11027 (parallel [(const_int 2)
11034 (parallel [(const_int 3)
11037 (const_int 15)]))))))]
11039 "vphaddbd\t{%1, %0|%0, %1}"
11040 [(set_attr "type" "sseiadd1")])
11042 (define_insn "xop_phaddbq"
11043 [(set (match_operand:V2DI 0 "register_operand" "=x")
11049 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11050 (parallel [(const_int 0)
11055 (parallel [(const_int 1)
11061 (parallel [(const_int 2)
11066 (parallel [(const_int 3)
11067 (const_int 7)])))))
11073 (parallel [(const_int 8)
11078 (parallel [(const_int 9)
11079 (const_int 13)]))))
11084 (parallel [(const_int 10)
11089 (parallel [(const_int 11)
11090 (const_int 15)])))))))]
11092 "vphaddbq\t{%1, %0|%0, %1}"
11093 [(set_attr "type" "sseiadd1")])
11095 (define_insn "xop_phaddwd"
11096 [(set (match_operand:V4SI 0 "register_operand" "=x")
11100 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11101 (parallel [(const_int 0)
11108 (parallel [(const_int 1)
11111 (const_int 7)])))))]
11113 "vphaddwd\t{%1, %0|%0, %1}"
11114 [(set_attr "type" "sseiadd1")])
11116 (define_insn "xop_phaddwq"
11117 [(set (match_operand:V2DI 0 "register_operand" "=x")
11122 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11123 (parallel [(const_int 0)
11128 (parallel [(const_int 1)
11134 (parallel [(const_int 2)
11139 (parallel [(const_int 3)
11140 (const_int 7)]))))))]
11142 "vphaddwq\t{%1, %0|%0, %1}"
11143 [(set_attr "type" "sseiadd1")])
11145 (define_insn "xop_phadddq"
11146 [(set (match_operand:V2DI 0 "register_operand" "=x")
11150 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11151 (parallel [(const_int 0)
11156 (parallel [(const_int 1)
11157 (const_int 3)])))))]
11159 "vphadddq\t{%1, %0|%0, %1}"
11160 [(set_attr "type" "sseiadd1")])
11162 (define_insn "xop_phaddubw"
11163 [(set (match_operand:V8HI 0 "register_operand" "=x")
11167 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11168 (parallel [(const_int 0)
11179 (parallel [(const_int 1)
11186 (const_int 15)])))))]
11188 "vphaddubw\t{%1, %0|%0, %1}"
11189 [(set_attr "type" "sseiadd1")])
11191 (define_insn "xop_phaddubd"
11192 [(set (match_operand:V4SI 0 "register_operand" "=x")
11197 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11198 (parallel [(const_int 0)
11205 (parallel [(const_int 1)
11208 (const_int 13)]))))
11213 (parallel [(const_int 2)
11220 (parallel [(const_int 3)
11223 (const_int 15)]))))))]
11225 "vphaddubd\t{%1, %0|%0, %1}"
11226 [(set_attr "type" "sseiadd1")])
11228 (define_insn "xop_phaddubq"
11229 [(set (match_operand:V2DI 0 "register_operand" "=x")
11235 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11236 (parallel [(const_int 0)
11241 (parallel [(const_int 1)
11247 (parallel [(const_int 2)
11252 (parallel [(const_int 3)
11253 (const_int 7)])))))
11259 (parallel [(const_int 8)
11264 (parallel [(const_int 9)
11265 (const_int 13)]))))
11270 (parallel [(const_int 10)
11275 (parallel [(const_int 11)
11276 (const_int 15)])))))))]
11278 "vphaddubq\t{%1, %0|%0, %1}"
11279 [(set_attr "type" "sseiadd1")])
11281 (define_insn "xop_phadduwd"
11282 [(set (match_operand:V4SI 0 "register_operand" "=x")
11286 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11287 (parallel [(const_int 0)
11294 (parallel [(const_int 1)
11297 (const_int 7)])))))]
11299 "vphadduwd\t{%1, %0|%0, %1}"
11300 [(set_attr "type" "sseiadd1")])
11302 (define_insn "xop_phadduwq"
11303 [(set (match_operand:V2DI 0 "register_operand" "=x")
11308 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11309 (parallel [(const_int 0)
11314 (parallel [(const_int 1)
11320 (parallel [(const_int 2)
11325 (parallel [(const_int 3)
11326 (const_int 7)]))))))]
11328 "vphadduwq\t{%1, %0|%0, %1}"
11329 [(set_attr "type" "sseiadd1")])
11331 (define_insn "xop_phaddudq"
11332 [(set (match_operand:V2DI 0 "register_operand" "=x")
11336 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11337 (parallel [(const_int 0)
11342 (parallel [(const_int 1)
11343 (const_int 3)])))))]
11345 "vphaddudq\t{%1, %0|%0, %1}"
11346 [(set_attr "type" "sseiadd1")])
11348 (define_insn "xop_phsubbw"
11349 [(set (match_operand:V8HI 0 "register_operand" "=x")
11353 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11354 (parallel [(const_int 0)
11365 (parallel [(const_int 1)
11372 (const_int 15)])))))]
11374 "vphsubbw\t{%1, %0|%0, %1}"
11375 [(set_attr "type" "sseiadd1")])
11377 (define_insn "xop_phsubwd"
11378 [(set (match_operand:V4SI 0 "register_operand" "=x")
11382 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11383 (parallel [(const_int 0)
11390 (parallel [(const_int 1)
11393 (const_int 7)])))))]
11395 "vphsubwd\t{%1, %0|%0, %1}"
11396 [(set_attr "type" "sseiadd1")])
11398 (define_insn "xop_phsubdq"
11399 [(set (match_operand:V2DI 0 "register_operand" "=x")
11403 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11404 (parallel [(const_int 0)
11409 (parallel [(const_int 1)
11410 (const_int 3)])))))]
11412 "vphsubdq\t{%1, %0|%0, %1}"
11413 [(set_attr "type" "sseiadd1")])
11415 ;; XOP permute instructions
11416 (define_insn "xop_pperm"
11417 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11419 [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m")
11420 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x")
11421 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
11422 UNSPEC_XOP_PERMUTE))]
11423 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11424 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11425 [(set_attr "type" "sse4arg")
11426 (set_attr "mode" "TI")])
11428 ;; XOP pack instructions that combine two vectors into a smaller vector
11429 (define_insn "xop_pperm_pack_v2di_v4si"
11430 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
11433 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m"))
11435 (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x"))))
11436 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11437 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11438 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11439 [(set_attr "type" "sse4arg")
11440 (set_attr "mode" "TI")])
11442 (define_insn "xop_pperm_pack_v4si_v8hi"
11443 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
11446 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m"))
11448 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))))
11449 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11450 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11451 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11452 [(set_attr "type" "sse4arg")
11453 (set_attr "mode" "TI")])
11455 (define_insn "xop_pperm_pack_v8hi_v16qi"
11456 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11459 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m"))
11461 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))))
11462 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11463 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11464 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11465 [(set_attr "type" "sse4arg")
11466 (set_attr "mode" "TI")])
11468 ;; XOP packed rotate instructions
11469 (define_expand "rotl<mode>3"
11470 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11471 (rotate:SSEMODE1248
11472 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11473 (match_operand:SI 2 "general_operand")))]
11476 /* If we were given a scalar, convert it to parallel */
11477 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11479 rtvec vs = rtvec_alloc (<ssescalarnum>);
11480 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11481 rtx reg = gen_reg_rtx (<MODE>mode);
11482 rtx op2 = operands[2];
11485 if (GET_MODE (op2) != <ssescalarmode>mode)
11487 op2 = gen_reg_rtx (<ssescalarmode>mode);
11488 convert_move (op2, operands[2], false);
11491 for (i = 0; i < <ssescalarnum>; i++)
11492 RTVEC_ELT (vs, i) = op2;
11494 emit_insn (gen_vec_init<mode> (reg, par));
11495 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11500 (define_expand "rotr<mode>3"
11501 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11502 (rotatert:SSEMODE1248
11503 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11504 (match_operand:SI 2 "general_operand")))]
11507 /* If we were given a scalar, convert it to parallel */
11508 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11510 rtvec vs = rtvec_alloc (<ssescalarnum>);
11511 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11512 rtx neg = gen_reg_rtx (<MODE>mode);
11513 rtx reg = gen_reg_rtx (<MODE>mode);
11514 rtx op2 = operands[2];
11517 if (GET_MODE (op2) != <ssescalarmode>mode)
11519 op2 = gen_reg_rtx (<ssescalarmode>mode);
11520 convert_move (op2, operands[2], false);
11523 for (i = 0; i < <ssescalarnum>; i++)
11524 RTVEC_ELT (vs, i) = op2;
11526 emit_insn (gen_vec_init<mode> (reg, par));
11527 emit_insn (gen_neg<mode>2 (neg, reg));
11528 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11533 (define_insn "xop_rotl<mode>3"
11534 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11535 (rotate:SSEMODE1248
11536 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11537 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11539 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11540 [(set_attr "type" "sseishft")
11541 (set_attr "length_immediate" "1")
11542 (set_attr "mode" "TI")])
11544 (define_insn "xop_rotr<mode>3"
11545 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11546 (rotatert:SSEMODE1248
11547 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11548 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11551 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11552 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11554 [(set_attr "type" "sseishft")
11555 (set_attr "length_immediate" "1")
11556 (set_attr "mode" "TI")])
11558 (define_expand "vrotr<mode>3"
11559 [(match_operand:SSEMODE1248 0 "register_operand" "")
11560 (match_operand:SSEMODE1248 1 "register_operand" "")
11561 (match_operand:SSEMODE1248 2 "register_operand" "")]
11564 rtx reg = gen_reg_rtx (<MODE>mode);
11565 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11566 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11570 (define_expand "vrotl<mode>3"
11571 [(match_operand:SSEMODE1248 0 "register_operand" "")
11572 (match_operand:SSEMODE1248 1 "register_operand" "")
11573 (match_operand:SSEMODE1248 2 "register_operand" "")]
11576 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11580 (define_insn "xop_vrotl<mode>3"
11581 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11582 (if_then_else:SSEMODE1248
11584 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11586 (rotate:SSEMODE1248
11587 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11589 (rotatert:SSEMODE1248
11591 (neg:SSEMODE1248 (match_dup 2)))))]
11592 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11593 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11594 [(set_attr "type" "sseishft")
11595 (set_attr "prefix_data16" "0")
11596 (set_attr "prefix_extra" "2")
11597 (set_attr "mode" "TI")])
11599 ;; XOP packed shift instructions.
11600 ;; FIXME: add V2DI back in
11601 (define_expand "vlshr<mode>3"
11602 [(match_operand:SSEMODE124 0 "register_operand" "")
11603 (match_operand:SSEMODE124 1 "register_operand" "")
11604 (match_operand:SSEMODE124 2 "register_operand" "")]
11607 rtx neg = gen_reg_rtx (<MODE>mode);
11608 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11609 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11613 (define_expand "vashr<mode>3"
11614 [(match_operand:SSEMODE124 0 "register_operand" "")
11615 (match_operand:SSEMODE124 1 "register_operand" "")
11616 (match_operand:SSEMODE124 2 "register_operand" "")]
11619 rtx neg = gen_reg_rtx (<MODE>mode);
11620 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11621 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11625 (define_expand "vashl<mode>3"
11626 [(match_operand:SSEMODE124 0 "register_operand" "")
11627 (match_operand:SSEMODE124 1 "register_operand" "")
11628 (match_operand:SSEMODE124 2 "register_operand" "")]
11631 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11635 (define_insn "xop_ashl<mode>3"
11636 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11637 (if_then_else:SSEMODE1248
11639 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11641 (ashift:SSEMODE1248
11642 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11644 (ashiftrt:SSEMODE1248
11646 (neg:SSEMODE1248 (match_dup 2)))))]
11647 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11648 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11649 [(set_attr "type" "sseishft")
11650 (set_attr "prefix_data16" "0")
11651 (set_attr "prefix_extra" "2")
11652 (set_attr "mode" "TI")])
11654 (define_insn "xop_lshl<mode>3"
11655 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11656 (if_then_else:SSEMODE1248
11658 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11660 (ashift:SSEMODE1248
11661 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11663 (lshiftrt:SSEMODE1248
11665 (neg:SSEMODE1248 (match_dup 2)))))]
11666 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11667 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11668 [(set_attr "type" "sseishft")
11669 (set_attr "prefix_data16" "0")
11670 (set_attr "prefix_extra" "2")
11671 (set_attr "mode" "TI")])
11673 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11674 (define_expand "ashlv16qi3"
11675 [(match_operand:V16QI 0 "register_operand" "")
11676 (match_operand:V16QI 1 "register_operand" "")
11677 (match_operand:SI 2 "nonmemory_operand" "")]
11680 rtvec vs = rtvec_alloc (16);
11681 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11682 rtx reg = gen_reg_rtx (V16QImode);
11684 for (i = 0; i < 16; i++)
11685 RTVEC_ELT (vs, i) = operands[2];
11687 emit_insn (gen_vec_initv16qi (reg, par));
11688 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11692 (define_expand "lshlv16qi3"
11693 [(match_operand:V16QI 0 "register_operand" "")
11694 (match_operand:V16QI 1 "register_operand" "")
11695 (match_operand:SI 2 "nonmemory_operand" "")]
11698 rtvec vs = rtvec_alloc (16);
11699 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11700 rtx reg = gen_reg_rtx (V16QImode);
11702 for (i = 0; i < 16; i++)
11703 RTVEC_ELT (vs, i) = operands[2];
11705 emit_insn (gen_vec_initv16qi (reg, par));
11706 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11710 (define_expand "ashrv16qi3"
11711 [(match_operand:V16QI 0 "register_operand" "")
11712 (match_operand:V16QI 1 "register_operand" "")
11713 (match_operand:SI 2 "nonmemory_operand" "")]
11716 rtvec vs = rtvec_alloc (16);
11717 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11718 rtx reg = gen_reg_rtx (V16QImode);
11720 rtx ele = ((CONST_INT_P (operands[2]))
11721 ? GEN_INT (- INTVAL (operands[2]))
11724 for (i = 0; i < 16; i++)
11725 RTVEC_ELT (vs, i) = ele;
11727 emit_insn (gen_vec_initv16qi (reg, par));
11729 if (!CONST_INT_P (operands[2]))
11731 rtx neg = gen_reg_rtx (V16QImode);
11732 emit_insn (gen_negv16qi2 (neg, reg));
11733 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11736 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11741 (define_expand "ashrv2di3"
11742 [(match_operand:V2DI 0 "register_operand" "")
11743 (match_operand:V2DI 1 "register_operand" "")
11744 (match_operand:DI 2 "nonmemory_operand" "")]
11747 rtvec vs = rtvec_alloc (2);
11748 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11749 rtx reg = gen_reg_rtx (V2DImode);
11752 if (CONST_INT_P (operands[2]))
11753 ele = GEN_INT (- INTVAL (operands[2]));
11754 else if (GET_MODE (operands[2]) != DImode)
11756 rtx move = gen_reg_rtx (DImode);
11757 ele = gen_reg_rtx (DImode);
11758 convert_move (move, operands[2], false);
11759 emit_insn (gen_negdi2 (ele, move));
11763 ele = gen_reg_rtx (DImode);
11764 emit_insn (gen_negdi2 (ele, operands[2]));
11767 RTVEC_ELT (vs, 0) = ele;
11768 RTVEC_ELT (vs, 1) = ele;
11769 emit_insn (gen_vec_initv2di (reg, par));
11770 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11774 ;; XOP FRCZ support
11776 (define_insn "xop_frcz<mode>2"
11777 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11779 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11782 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11783 [(set_attr "type" "ssecvt1")
11784 (set_attr "mode" "<MODE>")])
11787 (define_insn "xop_vmfrcz<mode>2"
11788 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11789 (vec_merge:SSEMODEF2P
11791 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11793 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11796 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11797 [(set_attr "type" "ssecvt1")
11798 (set_attr "mode" "<MODE>")])
11800 (define_insn "xop_frcz<mode>2256"
11801 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11803 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11806 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11807 [(set_attr "type" "ssecvt1")
11808 (set_attr "mode" "<MODE>")])
11810 (define_insn "xop_maskcmp<mode>3"
11811 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11812 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11813 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11814 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11816 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11817 [(set_attr "type" "sse4arg")
11818 (set_attr "prefix_data16" "0")
11819 (set_attr "prefix_rep" "0")
11820 (set_attr "prefix_extra" "2")
11821 (set_attr "length_immediate" "1")
11822 (set_attr "mode" "TI")])
11824 (define_insn "xop_maskcmp_uns<mode>3"
11825 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11826 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11827 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11828 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11830 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11831 [(set_attr "type" "ssecmp")
11832 (set_attr "prefix_data16" "0")
11833 (set_attr "prefix_rep" "0")
11834 (set_attr "prefix_extra" "2")
11835 (set_attr "length_immediate" "1")
11836 (set_attr "mode" "TI")])
11838 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11839 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11840 ;; the exact instruction generated for the intrinsic.
11841 (define_insn "xop_maskcmp_uns2<mode>3"
11842 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11843 (unspec:SSEMODE1248
11844 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11845 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11846 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11847 UNSPEC_XOP_UNSIGNED_CMP))]
11849 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11850 [(set_attr "type" "ssecmp")
11851 (set_attr "prefix_data16" "0")
11852 (set_attr "prefix_extra" "2")
11853 (set_attr "length_immediate" "1")
11854 (set_attr "mode" "TI")])
11856 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11857 ;; being added here to be complete.
11858 (define_insn "xop_pcom_tf<mode>3"
11859 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11860 (unspec:SSEMODE1248
11861 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11862 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11863 (match_operand:SI 3 "const_int_operand" "n")]
11864 UNSPEC_XOP_TRUEFALSE))]
11867 return ((INTVAL (operands[3]) != 0)
11868 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11869 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11871 [(set_attr "type" "ssecmp")
11872 (set_attr "prefix_data16" "0")
11873 (set_attr "prefix_extra" "2")
11874 (set_attr "length_immediate" "1")
11875 (set_attr "mode" "TI")])
11877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11878 (define_insn "*avx_aesenc"
11879 [(set (match_operand:V2DI 0 "register_operand" "=x")
11880 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11881 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11883 "TARGET_AES && TARGET_AVX"
11884 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11885 [(set_attr "type" "sselog1")
11886 (set_attr "prefix_extra" "1")
11887 (set_attr "prefix" "vex")
11888 (set_attr "mode" "TI")])
11890 (define_insn "aesenc"
11891 [(set (match_operand:V2DI 0 "register_operand" "=x")
11892 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11893 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11896 "aesenc\t{%2, %0|%0, %2}"
11897 [(set_attr "type" "sselog1")
11898 (set_attr "prefix_extra" "1")
11899 (set_attr "mode" "TI")])
11901 (define_insn "*avx_aesenclast"
11902 [(set (match_operand:V2DI 0 "register_operand" "=x")
11903 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11904 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11905 UNSPEC_AESENCLAST))]
11906 "TARGET_AES && TARGET_AVX"
11907 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11908 [(set_attr "type" "sselog1")
11909 (set_attr "prefix_extra" "1")
11910 (set_attr "prefix" "vex")
11911 (set_attr "mode" "TI")])
11913 (define_insn "aesenclast"
11914 [(set (match_operand:V2DI 0 "register_operand" "=x")
11915 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11916 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11917 UNSPEC_AESENCLAST))]
11919 "aesenclast\t{%2, %0|%0, %2}"
11920 [(set_attr "type" "sselog1")
11921 (set_attr "prefix_extra" "1")
11922 (set_attr "mode" "TI")])
11924 (define_insn "*avx_aesdec"
11925 [(set (match_operand:V2DI 0 "register_operand" "=x")
11926 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11927 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11929 "TARGET_AES && TARGET_AVX"
11930 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11931 [(set_attr "type" "sselog1")
11932 (set_attr "prefix_extra" "1")
11933 (set_attr "prefix" "vex")
11934 (set_attr "mode" "TI")])
11936 (define_insn "aesdec"
11937 [(set (match_operand:V2DI 0 "register_operand" "=x")
11938 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11939 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11942 "aesdec\t{%2, %0|%0, %2}"
11943 [(set_attr "type" "sselog1")
11944 (set_attr "prefix_extra" "1")
11945 (set_attr "mode" "TI")])
11947 (define_insn "*avx_aesdeclast"
11948 [(set (match_operand:V2DI 0 "register_operand" "=x")
11949 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11950 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11951 UNSPEC_AESDECLAST))]
11952 "TARGET_AES && TARGET_AVX"
11953 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11954 [(set_attr "type" "sselog1")
11955 (set_attr "prefix_extra" "1")
11956 (set_attr "prefix" "vex")
11957 (set_attr "mode" "TI")])
11959 (define_insn "aesdeclast"
11960 [(set (match_operand:V2DI 0 "register_operand" "=x")
11961 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11962 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11963 UNSPEC_AESDECLAST))]
11965 "aesdeclast\t{%2, %0|%0, %2}"
11966 [(set_attr "type" "sselog1")
11967 (set_attr "prefix_extra" "1")
11968 (set_attr "mode" "TI")])
11970 (define_insn "aesimc"
11971 [(set (match_operand:V2DI 0 "register_operand" "=x")
11972 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11975 "%vaesimc\t{%1, %0|%0, %1}"
11976 [(set_attr "type" "sselog1")
11977 (set_attr "prefix_extra" "1")
11978 (set_attr "prefix" "maybe_vex")
11979 (set_attr "mode" "TI")])
11981 (define_insn "aeskeygenassist"
11982 [(set (match_operand:V2DI 0 "register_operand" "=x")
11983 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11984 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11985 UNSPEC_AESKEYGENASSIST))]
11987 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11988 [(set_attr "type" "sselog1")
11989 (set_attr "prefix_extra" "1")
11990 (set_attr "length_immediate" "1")
11991 (set_attr "prefix" "maybe_vex")
11992 (set_attr "mode" "TI")])
11994 (define_insn "*vpclmulqdq"
11995 [(set (match_operand:V2DI 0 "register_operand" "=x")
11996 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11997 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11998 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12000 "TARGET_PCLMUL && TARGET_AVX"
12001 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12002 [(set_attr "type" "sselog1")
12003 (set_attr "prefix_extra" "1")
12004 (set_attr "length_immediate" "1")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "TI")])
12008 (define_insn "pclmulqdq"
12009 [(set (match_operand:V2DI 0 "register_operand" "=x")
12010 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
12011 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12012 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12015 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
12016 [(set_attr "type" "sselog1")
12017 (set_attr "prefix_extra" "1")
12018 (set_attr "length_immediate" "1")
12019 (set_attr "mode" "TI")])
12021 (define_expand "avx_vzeroall"
12022 [(match_par_dup 0 [(const_int 0)])]
12025 int nregs = TARGET_64BIT ? 16 : 8;
12028 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
12030 XVECEXP (operands[0], 0, 0)
12031 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
12034 for (regno = 0; regno < nregs; regno++)
12035 XVECEXP (operands[0], 0, regno + 1)
12036 = gen_rtx_SET (VOIDmode,
12037 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
12038 CONST0_RTX (V8SImode));
12041 (define_insn "*avx_vzeroall"
12042 [(match_parallel 0 "vzeroall_operation"
12043 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
12044 (set (match_operand 1 "register_operand" "=x")
12045 (match_operand 2 "const0_operand" "X"))])]
12048 [(set_attr "type" "sse")
12049 (set_attr "modrm" "0")
12050 (set_attr "memory" "none")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "OI")])
12054 ;; vzeroupper clobbers the upper 128bits of AVX registers.
12055 (define_insn "avx_vzeroupper"
12056 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12057 (clobber (reg:V8SI XMM0_REG))
12058 (clobber (reg:V8SI XMM1_REG))
12059 (clobber (reg:V8SI XMM2_REG))
12060 (clobber (reg:V8SI XMM3_REG))
12061 (clobber (reg:V8SI XMM4_REG))
12062 (clobber (reg:V8SI XMM5_REG))
12063 (clobber (reg:V8SI XMM6_REG))
12064 (clobber (reg:V8SI XMM7_REG))]
12065 "TARGET_AVX && !TARGET_64BIT"
12067 [(set_attr "type" "sse")
12068 (set_attr "modrm" "0")
12069 (set_attr "memory" "none")
12070 (set_attr "prefix" "vex")
12071 (set_attr "mode" "OI")])
12073 (define_insn "avx_vzeroupper_rex64"
12074 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12075 (clobber (reg:V8SI XMM0_REG))
12076 (clobber (reg:V8SI XMM1_REG))
12077 (clobber (reg:V8SI XMM2_REG))
12078 (clobber (reg:V8SI XMM3_REG))
12079 (clobber (reg:V8SI XMM4_REG))
12080 (clobber (reg:V8SI XMM5_REG))
12081 (clobber (reg:V8SI XMM6_REG))
12082 (clobber (reg:V8SI XMM7_REG))
12083 (clobber (reg:V8SI XMM8_REG))
12084 (clobber (reg:V8SI XMM9_REG))
12085 (clobber (reg:V8SI XMM10_REG))
12086 (clobber (reg:V8SI XMM11_REG))
12087 (clobber (reg:V8SI XMM12_REG))
12088 (clobber (reg:V8SI XMM13_REG))
12089 (clobber (reg:V8SI XMM14_REG))
12090 (clobber (reg:V8SI XMM15_REG))]
12091 "TARGET_AVX && TARGET_64BIT"
12093 [(set_attr "type" "sse")
12094 (set_attr "modrm" "0")
12095 (set_attr "memory" "none")
12096 (set_attr "prefix" "vex")
12097 (set_attr "mode" "OI")])
12099 (define_insn "avx_vpermil<mode>"
12100 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12102 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
12103 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
12106 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12107 [(set_attr "type" "sselog")
12108 (set_attr "prefix_extra" "1")
12109 (set_attr "length_immediate" "1")
12110 (set_attr "prefix" "vex")
12111 (set_attr "mode" "<MODE>")])
12113 (define_insn "avx_vpermilvar<mode>3"
12114 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12116 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12117 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
12120 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12121 [(set_attr "type" "sselog")
12122 (set_attr "prefix_extra" "1")
12123 (set_attr "prefix" "vex")
12124 (set_attr "mode" "<MODE>")])
12126 (define_insn "avx_vperm2f128<mode>3"
12127 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12128 (unspec:AVX256MODE2P
12129 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12130 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12131 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12132 UNSPEC_VPERMIL2F128))]
12134 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12135 [(set_attr "type" "sselog")
12136 (set_attr "prefix_extra" "1")
12137 (set_attr "length_immediate" "1")
12138 (set_attr "prefix" "vex")
12139 (set_attr "mode" "V8SF")])
12141 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12142 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12143 (vec_concat:AVXMODEF4P
12144 (vec_concat:<avxhalfvecmode>
12145 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12147 (vec_concat:<avxhalfvecmode>
12151 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12152 [(set_attr "type" "ssemov")
12153 (set_attr "prefix_extra" "1")
12154 (set_attr "prefix" "vex")
12155 (set_attr "mode" "<avxscalarmode>")])
12157 (define_insn "avx_vbroadcastss256"
12158 [(set (match_operand:V8SF 0 "register_operand" "=x")
12162 (match_operand:SF 1 "memory_operand" "m")
12175 "vbroadcastss\t{%1, %0|%0, %1}"
12176 [(set_attr "type" "ssemov")
12177 (set_attr "prefix_extra" "1")
12178 (set_attr "prefix" "vex")
12179 (set_attr "mode" "SF")])
12181 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12182 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12183 (vec_concat:AVX256MODEF2P
12184 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12187 "vbroadcastf128\t{%1, %0|%0, %1}"
12188 [(set_attr "type" "ssemov")
12189 (set_attr "prefix_extra" "1")
12190 (set_attr "prefix" "vex")
12191 (set_attr "mode" "V4SF")])
12193 (define_expand "avx_vinsertf128<mode>"
12194 [(match_operand:AVX256MODE 0 "register_operand" "")
12195 (match_operand:AVX256MODE 1 "register_operand" "")
12196 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12197 (match_operand:SI 3 "const_0_to_1_operand" "")]
12200 switch (INTVAL (operands[3]))
12203 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12207 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12211 gcc_unreachable ();
12216 (define_insn "vec_set_lo_<mode>"
12217 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12218 (vec_concat:AVX256MODE4P
12219 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12220 (vec_select:<avxhalfvecmode>
12221 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12222 (parallel [(const_int 2) (const_int 3)]))))]
12224 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12225 [(set_attr "type" "sselog")
12226 (set_attr "prefix_extra" "1")
12227 (set_attr "length_immediate" "1")
12228 (set_attr "prefix" "vex")
12229 (set_attr "mode" "V8SF")])
12231 (define_insn "vec_set_hi_<mode>"
12232 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12233 (vec_concat:AVX256MODE4P
12234 (vec_select:<avxhalfvecmode>
12235 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12236 (parallel [(const_int 0) (const_int 1)]))
12237 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12239 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12240 [(set_attr "type" "sselog")
12241 (set_attr "prefix_extra" "1")
12242 (set_attr "length_immediate" "1")
12243 (set_attr "prefix" "vex")
12244 (set_attr "mode" "V8SF")])
12246 (define_insn "vec_set_lo_<mode>"
12247 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12248 (vec_concat:AVX256MODE8P
12249 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12250 (vec_select:<avxhalfvecmode>
12251 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12252 (parallel [(const_int 4) (const_int 5)
12253 (const_int 6) (const_int 7)]))))]
12255 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12256 [(set_attr "type" "sselog")
12257 (set_attr "prefix_extra" "1")
12258 (set_attr "length_immediate" "1")
12259 (set_attr "prefix" "vex")
12260 (set_attr "mode" "V8SF")])
12262 (define_insn "vec_set_hi_<mode>"
12263 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12264 (vec_concat:AVX256MODE8P
12265 (vec_select:<avxhalfvecmode>
12266 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12267 (parallel [(const_int 0) (const_int 1)
12268 (const_int 2) (const_int 3)]))
12269 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12271 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12272 [(set_attr "type" "sselog")
12273 (set_attr "prefix_extra" "1")
12274 (set_attr "length_immediate" "1")
12275 (set_attr "prefix" "vex")
12276 (set_attr "mode" "V8SF")])
12278 (define_insn "vec_set_lo_v16hi"
12279 [(set (match_operand:V16HI 0 "register_operand" "=x")
12281 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12283 (match_operand:V16HI 1 "register_operand" "x")
12284 (parallel [(const_int 8) (const_int 9)
12285 (const_int 10) (const_int 11)
12286 (const_int 12) (const_int 13)
12287 (const_int 14) (const_int 15)]))))]
12289 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12290 [(set_attr "type" "sselog")
12291 (set_attr "prefix_extra" "1")
12292 (set_attr "length_immediate" "1")
12293 (set_attr "prefix" "vex")
12294 (set_attr "mode" "V8SF")])
12296 (define_insn "vec_set_hi_v16hi"
12297 [(set (match_operand:V16HI 0 "register_operand" "=x")
12300 (match_operand:V16HI 1 "register_operand" "x")
12301 (parallel [(const_int 0) (const_int 1)
12302 (const_int 2) (const_int 3)
12303 (const_int 4) (const_int 5)
12304 (const_int 6) (const_int 7)]))
12305 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12307 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix_extra" "1")
12310 (set_attr "length_immediate" "1")
12311 (set_attr "prefix" "vex")
12312 (set_attr "mode" "V8SF")])
12314 (define_insn "vec_set_lo_v32qi"
12315 [(set (match_operand:V32QI 0 "register_operand" "=x")
12317 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12319 (match_operand:V32QI 1 "register_operand" "x")
12320 (parallel [(const_int 16) (const_int 17)
12321 (const_int 18) (const_int 19)
12322 (const_int 20) (const_int 21)
12323 (const_int 22) (const_int 23)
12324 (const_int 24) (const_int 25)
12325 (const_int 26) (const_int 27)
12326 (const_int 28) (const_int 29)
12327 (const_int 30) (const_int 31)]))))]
12329 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12330 [(set_attr "type" "sselog")
12331 (set_attr "prefix_extra" "1")
12332 (set_attr "length_immediate" "1")
12333 (set_attr "prefix" "vex")
12334 (set_attr "mode" "V8SF")])
12336 (define_insn "vec_set_hi_v32qi"
12337 [(set (match_operand:V32QI 0 "register_operand" "=x")
12340 (match_operand:V32QI 1 "register_operand" "x")
12341 (parallel [(const_int 0) (const_int 1)
12342 (const_int 2) (const_int 3)
12343 (const_int 4) (const_int 5)
12344 (const_int 6) (const_int 7)
12345 (const_int 8) (const_int 9)
12346 (const_int 10) (const_int 11)
12347 (const_int 12) (const_int 13)
12348 (const_int 14) (const_int 15)]))
12349 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12351 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12352 [(set_attr "type" "sselog")
12353 (set_attr "prefix_extra" "1")
12354 (set_attr "length_immediate" "1")
12355 (set_attr "prefix" "vex")
12356 (set_attr "mode" "V8SF")])
12358 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12359 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12361 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12362 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12366 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12367 [(set_attr "type" "sselog1")
12368 (set_attr "prefix_extra" "1")
12369 (set_attr "prefix" "vex")
12370 (set_attr "mode" "<MODE>")])
12372 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12373 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12375 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12376 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12378 UNSPEC_MASKSTORE))]
12380 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12381 [(set_attr "type" "sselog1")
12382 (set_attr "prefix_extra" "1")
12383 (set_attr "prefix" "vex")
12384 (set_attr "mode" "<MODE>")])
12386 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12387 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12388 (unspec:AVX256MODE2P
12389 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12393 switch (which_alternative)
12398 switch (get_attr_mode (insn))
12401 return "vmovaps\t{%1, %x0|%x0, %1}";
12403 return "vmovapd\t{%1, %x0|%x0, %1}";
12405 return "vmovdqa\t{%1, %x0|%x0, %1}";
12412 gcc_unreachable ();
12414 [(set_attr "type" "ssemov")
12415 (set_attr "prefix" "vex")
12416 (set_attr "mode" "<avxvecmode>")
12417 (set (attr "length")
12418 (if_then_else (eq_attr "alternative" "0")
12420 (const_string "*")))])
12422 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12423 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12424 (unspec:<avxhalfvecmode>
12425 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12429 switch (which_alternative)
12434 switch (get_attr_mode (insn))
12437 return "vmovaps\t{%x1, %0|%0, %x1}";
12439 return "vmovapd\t{%x1, %0|%0, %x1}";
12441 return "vmovdqa\t{%x1, %0|%0, %x1}";
12448 gcc_unreachable ();
12450 [(set_attr "type" "ssemov")
12451 (set_attr "prefix" "vex")
12452 (set_attr "mode" "<avxvecmode>")
12453 (set (attr "length")
12454 (if_then_else (eq_attr "alternative" "0")
12456 (const_string "*")))])
12458 (define_expand "vec_init<mode>"
12459 [(match_operand:AVX256MODE 0 "register_operand" "")
12460 (match_operand 1 "" "")]
12463 ix86_expand_vector_init (false, operands[0], operands[1]);
12467 (define_insn "*vec_concat<mode>_avx"
12468 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12469 (vec_concat:AVX256MODE
12470 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12471 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12474 switch (which_alternative)
12477 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12479 switch (get_attr_mode (insn))
12482 return "vmovaps\t{%1, %x0|%x0, %1}";
12484 return "vmovapd\t{%1, %x0|%x0, %1}";
12486 return "vmovdqa\t{%1, %x0|%x0, %1}";
12489 gcc_unreachable ();
12492 [(set_attr "type" "sselog,ssemov")
12493 (set_attr "prefix_extra" "1,*")
12494 (set_attr "length_immediate" "1,*")
12495 (set_attr "prefix" "vex")
12496 (set_attr "mode" "<avxvecmode>")])