1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 return "vmovapd\t{%1, %0|%0, %1}";
199 return "vmovdqa\t{%1, %0|%0, %1}";
205 [(set_attr "type" "sselog1,ssemov,ssemov")
206 (set_attr "prefix" "vex")
207 (set_attr "mode" "<avxvecmode>")])
209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
210 ;; This is essential for maintaining stable calling conventions.
212 (define_expand "mov<mode>"
213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 ix86_expand_vector_move (<MODE>mode, operands);
221 (define_insn "*mov<mode>_internal"
222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
225 && (register_operand (operands[0], <MODE>mode)
226 || register_operand (operands[1], <MODE>mode))"
228 switch (which_alternative)
231 return standard_sse_constant_opcode (insn, operands[1]);
234 switch (get_attr_mode (insn))
237 return "movaps\t{%1, %0|%0, %1}";
239 return "movapd\t{%1, %0|%0, %1}";
241 return "movdqa\t{%1, %0|%0, %1}";
247 [(set_attr "type" "sselog1,ssemov,ssemov")
249 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251 (and (eq_attr "alternative" "2")
252 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
254 (const_string "V4SF")
255 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
256 (const_string "V4SF")
257 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
258 (const_string "V2DF")
260 (const_string "TI")))])
262 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264 ;; from memory, we'd prefer to load the memory directly into the %xmm
265 ;; register. To facilitate this happy circumstance, this pattern won't
266 ;; split until after register allocation. If the 64-bit value didn't
267 ;; come from memory, this is the best we can do. This is much better
268 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
271 (define_insn_and_split "movdi_to_sse"
273 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
278 "&& reload_completed"
281 if (register_operand (operands[1], DImode))
283 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284 Assemble the 64-bit DImode value in an xmm register. */
285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286 gen_rtx_SUBREG (SImode, operands[1], 0)));
287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288 gen_rtx_SUBREG (SImode, operands[1], 4)));
289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
292 else if (memory_operand (operands[1], DImode))
293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302 "TARGET_SSE && reload_completed"
305 (vec_duplicate:V4SF (match_dup 1))
309 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310 operands[2] = CONST0_RTX (V4SFmode);
314 [(set (match_operand:V2DF 0 "register_operand" "")
315 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316 "TARGET_SSE2 && reload_completed"
317 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
319 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320 operands[2] = CONST0_RTX (DFmode);
323 (define_expand "push<mode>1"
324 [(match_operand:AVX256MODE 0 "register_operand" "")]
327 ix86_expand_push (<MODE>mode, operands[0]);
331 (define_expand "push<mode>1"
332 [(match_operand:SSEMODE16 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "movmisalign<mode>"
340 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
348 (define_expand "movmisalign<mode>"
349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
357 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
358 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
367 (set_attr "prefix" "vex")
368 (set_attr "mode" "<MODE>")])
370 (define_insn "sse2_movq128"
371 [(set (match_operand:V2DI 0 "register_operand" "=x")
374 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375 (parallel [(const_int 0)]))
378 "%vmovq\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "maybe_vex")
381 (set_attr "mode" "TI")])
383 (define_insn "<sse>_movup<ssemodesuffixf2c>"
384 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
386 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
393 (set_attr "mode" "<MODE>")])
395 (define_insn "avx_movdqu<avxmodesuffix>"
396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401 "vmovdqu\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
404 (set_attr "prefix" "vex")
405 (set_attr "mode" "<avxvecmode>")])
407 (define_insn "sse2_movdqu"
408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412 "movdqu\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
415 (set_attr "prefix_data16" "1")
416 (set_attr "mode" "TI")])
418 (define_insn "avx_movnt<mode>"
419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
421 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
423 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
424 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "vex")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_movnt<mode>"
430 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
432 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
436 [(set_attr "type" "ssemov")
437 (set_attr "mode" "<MODE>")])
439 (define_insn "avx_movnt<mode>"
440 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
442 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
445 "vmovntdq\t{%1, %0|%0, %1}"
446 [(set_attr "type" "ssecvt")
447 (set_attr "prefix" "vex")
448 (set_attr "mode" "<avxvecmode>")])
450 (define_insn "sse2_movntv2di"
451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
455 "movntdq\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "1")
458 (set_attr "mode" "TI")])
460 (define_insn "sse2_movntsi"
461 [(set (match_operand:SI 0 "memory_operand" "=m")
462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
465 "movnti\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
468 (set_attr "mode" "V2DF")])
470 (define_insn "avx_lddqu<avxmodesuffix>"
471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
476 "vlddqu\t{%1, %0|%0, %1}"
477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
479 (set_attr "prefix" "vex")
480 (set_attr "mode" "<avxvecmode>")])
482 (define_insn "sse3_lddqu"
483 [(set (match_operand:V16QI 0 "register_operand" "=x")
484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
487 "lddqu\t{%1, %0|%0, %1}"
488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
491 (set_attr "prefix_rep" "1")
492 (set_attr "mode" "TI")])
494 ; Expand patterns for non-temporal stores. At the moment, only those
495 ; that directly map to insns are defined; it would be possible to
496 ; define patterns for other modes that would expand to several insns.
498 (define_expand "storent<mode>"
499 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
501 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 (define_expand "storent<mode>"
507 [(set (match_operand:MODEF 0 "memory_operand" "")
509 [(match_operand:MODEF 1 "register_operand" "")]
514 (define_expand "storentv2di"
515 [(set (match_operand:V2DI 0 "memory_operand" "")
516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
521 (define_expand "storentsi"
522 [(set (match_operand:SI 0 "memory_operand" "")
523 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;; Parallel floating point arithmetic
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 (define_expand "<code><mode>2"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
537 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
539 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
541 (define_expand "<plusminus_insn><mode>3"
542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543 (plusminus:AVX256MODEF2P
544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
549 (define_insn "*avx_<plusminus_insn><mode>3"
550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551 (plusminus:AVXMODEF2P
552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
556 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<avxvecmode>")])
561 (define_expand "<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563 (plusminus:SSEMODEF2P
564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
569 (define_insn "*<plusminus_insn><mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571 (plusminus:SSEMODEF2P
572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
576 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*avx_vm<plusminus_insn><mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (vec_merge:SSEMODEF2P
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "register_operand" "x")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
588 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
589 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<ssescalarmode>")])
594 (define_insn "<sse>_vm<plusminus_insn><mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (vec_merge:SSEMODEF2P
597 (plusminus:SSEMODEF2P
598 (match_operand:SSEMODEF2P 1 "register_operand" "0")
599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
603 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "mode" "<ssescalarmode>")])
607 (define_expand "mul<mode>3"
608 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
610 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
615 (define_insn "*avx_mul<mode>3"
616 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
618 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
621 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
622 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<avxvecmode>")])
627 (define_expand "mul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
635 (define_insn "*mul<mode>3"
636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
641 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
642 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<MODE>")])
646 (define_insn "*avx_vmmul<mode>3"
647 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648 (vec_merge:SSEMODEF2P
650 (match_operand:SSEMODEF2P 1 "register_operand" "x")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
655 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
656 [(set_attr "type" "ssemul")
657 (set_attr "prefix" "vex")
658 (set_attr "mode" "<ssescalarmode>")])
660 (define_insn "<sse>_vmmul<mode>3"
661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (vec_merge:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "register_operand" "0")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
668 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
669 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "mode" "<ssescalarmode>")])
673 (define_expand "divv8sf3"
674 [(set (match_operand:V8SF 0 "register_operand" "")
675 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682 && flag_finite_math_only && !flag_trapping_math
683 && flag_unsafe_math_optimizations)
685 ix86_emit_swdivsf (operands[0], operands[1],
686 operands[2], V8SFmode);
691 (define_expand "divv4df3"
692 [(set (match_operand:V4DF 0 "register_operand" "")
693 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
696 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
698 (define_insn "avx_div<mode>3"
699 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
701 (match_operand:AVXMODEF2P 1 "register_operand" "x")
702 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
704 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "type" "ssediv")
706 (set_attr "prefix" "vex")
707 (set_attr "mode" "<MODE>")])
709 (define_expand "divv4sf3"
710 [(set (match_operand:V4SF 0 "register_operand" "")
711 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
715 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716 && flag_finite_math_only && !flag_trapping_math
717 && flag_unsafe_math_optimizations)
719 ix86_emit_swdivsf (operands[0], operands[1],
720 operands[2], V4SFmode);
725 (define_expand "divv2df3"
726 [(set (match_operand:V2DF 0 "register_operand" "")
727 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
732 (define_insn "*avx_div<mode>3"
733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
735 (match_operand:SSEMODEF2P 1 "register_operand" "x")
736 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
738 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
739 [(set_attr "type" "ssediv")
740 (set_attr "prefix" "vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_div<mode>3"
744 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (match_operand:SSEMODEF2P 1 "register_operand" "0")
747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "*avx_vmdiv<mode>3"
754 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755 (vec_merge:SSEMODEF2P
757 (match_operand:SSEMODEF2P 1 "register_operand" "x")
758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
761 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
762 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
763 [(set_attr "type" "ssediv")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "<ssescalarmode>")])
767 (define_insn "<sse>_vmdiv<mode>3"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769 (vec_merge:SSEMODEF2P
771 (match_operand:SSEMODEF2P 1 "register_operand" "0")
772 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
776 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssediv")
778 (set_attr "mode" "<ssescalarmode>")])
780 (define_insn "avx_rcpv8sf2"
781 [(set (match_operand:V8SF 0 "register_operand" "=x")
783 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
785 "vrcpps\t{%1, %0|%0, %1}"
786 [(set_attr "type" "sse")
787 (set_attr "prefix" "vex")
788 (set_attr "mode" "V8SF")])
790 (define_insn "sse_rcpv4sf2"
791 [(set (match_operand:V4SF 0 "register_operand" "=x")
793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
795 "%vrcpps\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "prefix" "maybe_vex")
799 (set_attr "mode" "V4SF")])
801 (define_insn "*avx_vmrcpv4sf2"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
806 (match_operand:V4SF 2 "register_operand" "x")
809 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810 [(set_attr "type" "sse")
811 (set_attr "prefix" "vex")
812 (set_attr "mode" "SF")])
814 (define_insn "sse_vmrcpv4sf2"
815 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
819 (match_operand:V4SF 2 "register_operand" "0")
822 "rcpss\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
825 (set_attr "mode" "SF")])
827 (define_expand "sqrtv8sf2"
828 [(set (match_operand:V8SF 0 "register_operand" "")
829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
841 (define_insn "avx_sqrtv8sf2"
842 [(set (match_operand:V8SF 0 "register_operand" "=x")
843 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
845 "vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "prefix" "vex")
848 (set_attr "mode" "V8SF")])
850 (define_expand "sqrtv4sf2"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
855 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
864 (define_insn "sse_sqrtv4sf2"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrtps\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V4SF")])
874 (define_insn "sqrtv4df2"
875 [(set (match_operand:V4DF 0 "register_operand" "=x")
876 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
878 "vsqrtpd\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sse")
880 (set_attr "prefix" "vex")
881 (set_attr "mode" "V4DF")])
883 (define_insn "sqrtv2df2"
884 [(set (match_operand:V2DF 0 "register_operand" "=x")
885 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
887 "%vsqrtpd\t{%1, %0|%0, %1}"
888 [(set_attr "type" "sse")
889 (set_attr "prefix" "maybe_vex")
890 (set_attr "mode" "V2DF")])
892 (define_insn "*avx_vmsqrt<mode>2"
893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894 (vec_merge:SSEMODEF2P
896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897 (match_operand:SSEMODEF2P 2 "register_operand" "x")
899 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
900 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_insn "<sse>_vmsqrt<mode>2"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
909 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_expand "rsqrtv8sf2"
919 [(set (match_operand:V8SF 0 "register_operand" "")
921 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922 "TARGET_AVX && TARGET_SSE_MATH"
924 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 (define_insn "avx_rsqrtv8sf2"
929 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
933 "vrsqrtps\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "V8SF")])
938 (define_expand "rsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "")
941 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
944 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 (define_insn "sse_rsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
953 "%vrsqrtps\t{%1, %0|%0, %1}"
954 [(set_attr "type" "sse")
955 (set_attr "prefix" "maybe_vex")
956 (set_attr "mode" "V4SF")])
958 (define_insn "*avx_vmrsqrtv4sf2"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
961 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
963 (match_operand:V4SF 2 "register_operand" "x")
966 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967 [(set_attr "type" "sse")
968 (set_attr "prefix" "vex")
969 (set_attr "mode" "SF")])
971 (define_insn "sse_vmrsqrtv4sf2"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
976 (match_operand:V4SF 2 "register_operand" "0")
979 "rsqrtss\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "mode" "SF")])
983 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984 ;; isn't really correct, as those rtl operators aren't defined when
985 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
987 (define_expand "<code><mode>3"
988 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989 (smaxmin:AVX256MODEF2P
990 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
994 if (!flag_finite_math_only)
995 operands[1] = force_reg (<MODE>mode, operands[1]);
996 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_insn "*avx_<code><mode>3_finite"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1017 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1018 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "prefix" "vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1026 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "mode" "<MODE>")])
1034 (define_insn "*avx_<code><mode>3"
1035 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1037 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "<avxvecmode>")])
1045 (define_insn "*<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<MODE>")])
1055 (define_insn "*avx_vm<code><mode>3"
1056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057 (vec_merge:SSEMODEF2P
1059 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1063 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1064 "v<maxmin_float>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sse")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 (define_insn "<sse>_vm<code><mode>3"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "<maxmin_float>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "mode" "<ssescalarmode>")])
1082 ;; These versions of the min/max patterns implement exactly the operations
1083 ;; min = (op1 < op2 ? op1 : op2)
1084 ;; max = (!(op1 < op2) ? op1 : op2)
1085 ;; Their operands are not commutative, and thus they may be used in the
1086 ;; presence of -0.0 and NaN.
1088 (define_insn "*avx_ieee_smin<mode>3"
1089 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1091 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1094 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1096 [(set_attr "type" "sseadd")
1097 (set_attr "prefix" "vex")
1098 (set_attr "mode" "<avxvecmode>")])
1100 (define_insn "*avx_ieee_smax<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*ieee_smin<mode>3"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*ieee_smax<mode>3"
1124 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1126 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1129 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1130 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "mode" "<MODE>")])
1134 (define_insn "avx_addsubv8sf3"
1135 [(set (match_operand:V8SF 0 "register_operand" "=x")
1138 (match_operand:V8SF 1 "register_operand" "x")
1139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140 (minus:V8SF (match_dup 1) (match_dup 2))
1143 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144 [(set_attr "type" "sseadd")
1145 (set_attr "prefix" "vex")
1146 (set_attr "mode" "V8SF")])
1148 (define_insn "avx_addsubv4df3"
1149 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154 (minus:V4DF (match_dup 1) (match_dup 2))
1157 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158 [(set_attr "type" "sseadd")
1159 (set_attr "prefix" "vex")
1160 (set_attr "mode" "V4DF")])
1162 (define_insn "*avx_addsubv4sf3"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1166 (match_operand:V4SF 1 "register_operand" "x")
1167 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168 (minus:V4SF (match_dup 1) (match_dup 2))
1171 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V4SF")])
1176 (define_insn "sse3_addsubv4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1180 (match_operand:V4SF 1 "register_operand" "0")
1181 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182 (minus:V4SF (match_dup 1) (match_dup 2))
1185 "addsubps\t{%2, %0|%0, %2}"
1186 [(set_attr "type" "sseadd")
1187 (set_attr "prefix_rep" "1")
1188 (set_attr "mode" "V4SF")])
1190 (define_insn "*avx_addsubv2df3"
1191 [(set (match_operand:V2DF 0 "register_operand" "=x")
1194 (match_operand:V2DF 1 "register_operand" "x")
1195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196 (minus:V2DF (match_dup 1) (match_dup 2))
1199 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "type" "sseadd")
1201 (set_attr "prefix" "vex")
1202 (set_attr "mode" "V2DF")])
1204 (define_insn "sse3_addsubv2df3"
1205 [(set (match_operand:V2DF 0 "register_operand" "=x")
1208 (match_operand:V2DF 1 "register_operand" "0")
1209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210 (minus:V2DF (match_dup 1) (match_dup 2))
1213 "addsubpd\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v4df3"
1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1224 (match_operand:V4DF 1 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1226 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)]))
1235 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1240 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1241 [(set_attr "type" "sseadd")
1242 (set_attr "prefix" "vex")
1243 (set_attr "mode" "V4DF")])
1245 (define_insn "avx_h<plusminus_insn>v8sf3"
1246 [(set (match_operand:V8SF 0 "register_operand" "=x")
1252 (match_operand:V8SF 1 "register_operand" "x")
1253 (parallel [(const_int 0)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262 (parallel [(const_int 0)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V8SF")])
1288 (define_insn "*avx_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "x")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "prefix" "vex")
1313 (set_attr "mode" "V4SF")])
1315 (define_insn "sse3_h<plusminus_insn>v4sf3"
1316 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 1 "register_operand" "0")
1322 (parallel [(const_int 0)]))
1323 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1340 (set_attr "prefix_rep" "1")
1341 (set_attr "mode" "V4SF")])
1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (match_operand:V2DF 1 "register_operand" "x")
1349 (parallel [(const_int 0)]))
1350 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1357 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1358 [(set_attr "type" "sseadd")
1359 (set_attr "prefix" "vex")
1360 (set_attr "mode" "V2DF")])
1362 (define_insn "sse3_h<plusminus_insn>v2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (match_operand:V2DF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1376 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "V2DF")])
1380 (define_expand "reduc_splus_v4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:V4SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V4SFmode);
1388 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1392 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_splus_v2df"
1397 [(match_operand:V2DF 0 "register_operand" "")
1398 (match_operand:V2DF 1 "register_operand" "")]
1401 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 (define_expand "reduc_smax_v4sf"
1406 [(match_operand:V4SF 0 "register_operand" "")
1407 (match_operand:V4SF 1 "register_operand" "")]
1410 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 (define_expand "reduc_smin_v4sf"
1415 [(match_operand:V4SF 0 "register_operand" "")
1416 (match_operand:V4SF 1 "register_operand" "")]
1419 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point comparisons
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1430 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1432 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445 (vec_merge:SSEMODEF2P
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "prefix" "vex")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461 ;; may generate 256bit vector compare instructions.
1462 (define_insn "*avx_maskcmp<mode>3"
1463 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1472 (set_attr "mode" "<avxvecmode>")])
1474 (define_insn "<sse>_maskcmp<mode>3"
1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "*avx_vmmaskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488 (vec_merge:SSEMODEF2P
1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1495 "vcmp%D3s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1496 [(set_attr "type" "ssecmp")
1497 (set_attr "prefix" "vex")
1498 (set_attr "mode" "<ssescalarmode>")])
1500 (define_insn "<sse>_vmmaskcmp<mode>3"
1501 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1502 (vec_merge:SSEMODEF2P
1503 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1504 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1505 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1508 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1509 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1510 [(set_attr "type" "ssecmp")
1511 (set_attr "length_immediate" "1")
1512 (set_attr "mode" "<ssescalarmode>")])
1514 (define_insn "<sse>_comi"
1515 [(set (reg:CCFP FLAGS_REG)
1518 (match_operand:<ssevecmode> 0 "register_operand" "x")
1519 (parallel [(const_int 0)]))
1521 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1522 (parallel [(const_int 0)]))))]
1523 "SSE_FLOAT_MODE_P (<MODE>mode)"
1524 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1525 [(set_attr "type" "ssecomi")
1526 (set_attr "prefix" "maybe_vex")
1527 (set_attr "prefix_rep" "0")
1528 (set (attr "prefix_data16")
1529 (if_then_else (eq_attr "mode" "DF")
1531 (const_string "0")))
1532 (set_attr "mode" "<MODE>")])
1534 (define_insn "<sse>_ucomi"
1535 [(set (reg:CCFPU FLAGS_REG)
1538 (match_operand:<ssevecmode> 0 "register_operand" "x")
1539 (parallel [(const_int 0)]))
1541 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1542 (parallel [(const_int 0)]))))]
1543 "SSE_FLOAT_MODE_P (<MODE>mode)"
1544 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "ssecomi")
1546 (set_attr "prefix" "maybe_vex")
1547 (set_attr "prefix_rep" "0")
1548 (set (attr "prefix_data16")
1549 (if_then_else (eq_attr "mode" "DF")
1551 (const_string "0")))
1552 (set_attr "mode" "<MODE>")])
1554 (define_expand "vcond<mode>"
1555 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1556 (if_then_else:AVXMODEF2P
1557 (match_operator 3 ""
1558 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1559 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1560 (match_operand:AVXMODEF2P 1 "general_operand" "")
1561 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1562 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1563 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1565 bool ok = ix86_expand_fp_vcond (operands);
1570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1572 ;; Parallel floating point logical operations
1574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1576 (define_insn "avx_andnot<mode>3"
1577 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1580 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1581 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1582 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1583 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1584 [(set_attr "type" "sselog")
1585 (set_attr "prefix" "vex")
1586 (set_attr "mode" "<avxvecmode>")])
1588 (define_insn "<sse>_andnot<mode>3"
1589 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1592 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1593 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1594 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1595 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1596 [(set_attr "type" "sselog")
1597 (set_attr "mode" "<MODE>")])
1599 (define_expand "<code><mode>3"
1600 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1601 (any_logic:AVX256MODEF2P
1602 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1603 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1604 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1605 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1607 (define_insn "*avx_<code><mode>3"
1608 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1609 (any_logic:AVXMODEF2P
1610 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1611 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1612 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1613 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1614 "v<logic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "type" "sselog")
1616 (set_attr "prefix" "vex")
1617 (set_attr "mode" "<avxvecmode>")])
1619 (define_expand "<code><mode>3"
1620 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1621 (any_logic:SSEMODEF2P
1622 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1623 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1624 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1625 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1627 (define_insn "*<code><mode>3"
1628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1629 (any_logic:SSEMODEF2P
1630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1633 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1634 "<logic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sselog")
1636 (set_attr "mode" "<MODE>")])
1638 (define_expand "copysign<mode>3"
1641 (not:SSEMODEF2P (match_dup 3))
1642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1644 (and:SSEMODEF2P (match_dup 3)
1645 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1646 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1647 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1648 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1650 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1652 operands[4] = gen_reg_rtx (<MODE>mode);
1653 operands[5] = gen_reg_rtx (<MODE>mode);
1656 ;; Also define scalar versions. These are used for abs, neg, and
1657 ;; conditional move. Using subregs into vector modes causes register
1658 ;; allocation lossage. These patterns do not allow memory operands
1659 ;; because the native instructions read the full 128-bits.
1661 (define_insn "*avx_andnot<mode>3"
1662 [(set (match_operand:MODEF 0 "register_operand" "=x")
1665 (match_operand:MODEF 1 "register_operand" "x"))
1666 (match_operand:MODEF 2 "register_operand" "x")))]
1667 "AVX_FLOAT_MODE_P (<MODE>mode)"
1668 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1669 [(set_attr "type" "sselog")
1670 (set_attr "prefix" "vex")
1671 (set_attr "mode" "<ssevecmode>")])
1673 (define_insn "*andnot<mode>3"
1674 [(set (match_operand:MODEF 0 "register_operand" "=x")
1677 (match_operand:MODEF 1 "register_operand" "0"))
1678 (match_operand:MODEF 2 "register_operand" "x")))]
1679 "SSE_FLOAT_MODE_P (<MODE>mode)"
1680 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1681 [(set_attr "type" "sselog")
1682 (set_attr "mode" "<ssevecmode>")])
1684 (define_insn "*avx_<code><mode>3"
1685 [(set (match_operand:MODEF 0 "register_operand" "=x")
1687 (match_operand:MODEF 1 "register_operand" "x")
1688 (match_operand:MODEF 2 "register_operand" "x")))]
1689 "AVX_FLOAT_MODE_P (<MODE>mode)"
1690 "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1691 [(set_attr "type" "sselog")
1692 (set_attr "prefix" "vex")
1693 (set_attr "mode" "<ssevecmode>")])
1695 (define_insn "*<code><mode>3"
1696 [(set (match_operand:MODEF 0 "register_operand" "=x")
1698 (match_operand:MODEF 1 "register_operand" "0")
1699 (match_operand:MODEF 2 "register_operand" "x")))]
1700 "SSE_FLOAT_MODE_P (<MODE>mode)"
1701 "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1702 [(set_attr "type" "sselog")
1703 (set_attr "mode" "<ssevecmode>")])
1705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707 ;; FMA4 floating point multiply/accumulate instructions. This
1708 ;; includes the scalar version of the instructions as well as the
1711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1713 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1714 ;; combine to generate a multiply/add with two memory references. We then
1715 ;; split this insn, into loading up the destination register with one of the
1716 ;; memory operations. If we don't manage to split the insn, reload will
1717 ;; generate the appropriate moves. The reason this is needed, is that combine
1718 ;; has already folded one of the memory references into both the multiply and
1719 ;; add insns, and it can't generate a new pseudo. I.e.:
1720 ;; (set (reg1) (mem (addr1)))
1721 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1722 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1724 (define_insn "fma4_fmadd<mode>4256"
1725 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1728 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1729 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1730 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1731 "TARGET_FMA4 && TARGET_FUSED_MADD"
1732 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1733 [(set_attr "type" "ssemuladd")
1734 (set_attr "mode" "<MODE>")])
1736 ;; Floating multiply and subtract.
1737 (define_insn "fma4_fmsub<mode>4256"
1738 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1743 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1744 "TARGET_FMA4 && TARGET_FUSED_MADD"
1745 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1746 [(set_attr "type" "ssemuladd")
1747 (set_attr "mode" "<MODE>")])
1749 ;; Floating point negative multiply and add.
1750 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1751 (define_insn "fma4_fnmadd<mode>4256"
1752 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1754 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1756 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1757 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1758 "TARGET_FMA4 && TARGET_FUSED_MADD"
1759 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1760 [(set_attr "type" "ssemuladd")
1761 (set_attr "mode" "<MODE>")])
1763 ;; Floating point negative multiply and subtract.
1764 (define_insn "fma4_fnmsub<mode>4256"
1765 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1769 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1770 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1771 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1772 "TARGET_FMA4 && TARGET_FUSED_MADD"
1773 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1774 [(set_attr "type" "ssemuladd")
1775 (set_attr "mode" "<MODE>")])
1777 (define_insn "fma4_fmadd<mode>4"
1778 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1781 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1782 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1783 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1784 "TARGET_FMA4 && TARGET_FUSED_MADD"
1785 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1786 [(set_attr "type" "ssemuladd")
1787 (set_attr "mode" "<MODE>")])
1789 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fmadd.
1792 (define_insn "fma4_vmfmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P
1797 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1798 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1799 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1802 "TARGET_FMA4 && TARGET_FUSED_MADD"
1803 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 ;; Floating multiply and subtract.
1808 ;; Allow two memory operands the same as fmadd.
1809 (define_insn "fma4_fmsub<mode>4"
1810 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1813 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1814 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1815 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1816 "TARGET_FMA4 && TARGET_FUSED_MADD"
1817 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 ;; For the scalar operations, use operand1 for the upper words that aren't
1822 ;; modified, so restrict the forms that are generated.
1823 ;; Scalar version of fmsub.
1824 (define_insn "fma4_vmfmsub<mode>4"
1825 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1826 (vec_merge:SSEMODEF2P
1829 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1830 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1831 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1834 "TARGET_FMA4 && TARGET_FUSED_MADD"
1835 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 ;; Floating point negative multiply and add.
1840 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1841 (define_insn "fma4_fnmadd<mode>4"
1842 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1844 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1846 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1847 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1848 "TARGET_FMA4 && TARGET_FUSED_MADD"
1849 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 ;; For the scalar operations, use operand1 for the upper words that aren't
1854 ;; modified, so restrict the forms that are generated.
1855 ;; Scalar version of fnmadd.
1856 (define_insn "fma4_vmfnmadd<mode>4"
1857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1858 (vec_merge:SSEMODEF2P
1860 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1862 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1867 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1868 [(set_attr "type" "ssemuladd")
1869 (set_attr "mode" "<MODE>")])
1871 ;; Floating point negative multiply and subtract.
1872 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1873 (define_insn "fma4_fnmsub<mode>4"
1874 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1878 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1880 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1881 "TARGET_FMA4 && TARGET_FUSED_MADD"
1882 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1883 [(set_attr "type" "ssemuladd")
1884 (set_attr "mode" "<MODE>")])
1886 ;; For the scalar operations, use operand1 for the upper words that aren't
1887 ;; modified, so restrict the forms that are generated.
1888 ;; Scalar version of fnmsub.
1889 (define_insn "fma4_vmfnmsub<mode>4"
1890 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1891 (vec_merge:SSEMODEF2P
1895 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1896 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1897 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1900 "TARGET_FMA4 && TARGET_FUSED_MADD"
1901 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "fma4i_fmadd<mode>4256"
1906 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1910 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1911 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1912 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1913 UNSPEC_FMA4_INTRINSIC))]
1914 "TARGET_FMA4 && TARGET_FUSED_MADD"
1915 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1916 [(set_attr "type" "ssemuladd")
1917 (set_attr "mode" "<MODE>")])
1919 (define_insn "fma4i_fmsub<mode>4256"
1920 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1926 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1927 UNSPEC_FMA4_INTRINSIC))]
1928 "TARGET_FMA4 && TARGET_FUSED_MADD"
1929 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1930 [(set_attr "type" "ssemuladd")
1931 (set_attr "mode" "<MODE>")])
1933 (define_insn "fma4i_fnmadd<mode>4256"
1934 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1937 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1939 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1940 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1941 UNSPEC_FMA4_INTRINSIC))]
1942 "TARGET_FMA4 && TARGET_FUSED_MADD"
1943 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1947 (define_insn "fma4i_fnmsub<mode>4256"
1948 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1953 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1954 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1955 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1956 UNSPEC_FMA4_INTRINSIC))]
1957 "TARGET_FMA4 && TARGET_FUSED_MADD"
1958 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1959 [(set_attr "type" "ssemuladd")
1960 (set_attr "mode" "<MODE>")])
1962 (define_insn "fma4i_fmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1967 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1968 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1969 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1970 UNSPEC_FMA4_INTRINSIC))]
1971 "TARGET_FMA4 && TARGET_FUSED_MADD"
1972 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 (define_insn "fma4i_fmsub<mode>4"
1977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1983 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1984 UNSPEC_FMA4_INTRINSIC))]
1985 "TARGET_FMA4 && TARGET_FUSED_MADD"
1986 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1987 [(set_attr "type" "ssemuladd")
1988 (set_attr "mode" "<MODE>")])
1990 (define_insn "fma4i_fnmadd<mode>4"
1991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1994 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1996 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1997 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1998 UNSPEC_FMA4_INTRINSIC))]
1999 "TARGET_FMA4 && TARGET_FUSED_MADD"
2000 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 (define_insn "fma4i_fnmsub<mode>4"
2005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2010 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2011 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2012 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2013 UNSPEC_FMA4_INTRINSIC))]
2014 "TARGET_FMA4 && TARGET_FUSED_MADD"
2015 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2016 [(set_attr "type" "ssemuladd")
2017 (set_attr "mode" "<MODE>")])
2019 ;; For the scalar operations, use operand1 for the upper words that aren't
2020 ;; modified, so restrict the forms that are accepted.
2021 (define_insn "fma4i_vmfmadd<mode>4"
2022 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2024 [(vec_merge:SSEMODEF2P
2027 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2028 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2029 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2032 UNSPEC_FMA4_INTRINSIC))]
2033 "TARGET_FMA4 && TARGET_FUSED_MADD"
2034 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<ssescalarmode>")])
2038 (define_insn "fma4i_vmfmsub<mode>4"
2039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2041 [(vec_merge:SSEMODEF2P
2044 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2045 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2046 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2049 UNSPEC_FMA4_INTRINSIC))]
2050 "TARGET_FMA4 && TARGET_FUSED_MADD"
2051 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2052 [(set_attr "type" "ssemuladd")
2053 (set_attr "mode" "<ssescalarmode>")])
2055 (define_insn "fma4i_vmfnmadd<mode>4"
2056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2058 [(vec_merge:SSEMODEF2P
2060 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2062 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2063 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2066 UNSPEC_FMA4_INTRINSIC))]
2067 "TARGET_FMA4 && TARGET_FUSED_MADD"
2068 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2069 [(set_attr "type" "ssemuladd")
2070 (set_attr "mode" "<ssescalarmode>")])
2072 (define_insn "fma4i_vmfnmsub<mode>4"
2073 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2075 [(vec_merge:SSEMODEF2P
2079 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2080 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2081 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2084 UNSPEC_FMA4_INTRINSIC))]
2085 "TARGET_FMA4 && TARGET_FUSED_MADD"
2086 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2087 [(set_attr "type" "ssemuladd")
2088 (set_attr "mode" "<ssescalarmode>")])
2090 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2092 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2096 (define_insn "fma4_fmaddsubv8sf4"
2097 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2101 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2102 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2103 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2110 "TARGET_FMA4 && TARGET_FUSED_MADD"
2111 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "V8SF")])
2115 (define_insn "fma4_fmaddsubv4df4"
2116 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2120 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2121 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2122 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2129 "TARGET_FMA4 && TARGET_FUSED_MADD"
2130 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2131 [(set_attr "type" "ssemuladd")
2132 (set_attr "mode" "V4DF")])
2134 (define_insn "fma4_fmaddsubv4sf4"
2135 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2139 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2140 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2141 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2148 "TARGET_FMA4 && TARGET_FUSED_MADD"
2149 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2150 [(set_attr "type" "ssemuladd")
2151 (set_attr "mode" "V4SF")])
2153 (define_insn "fma4_fmaddsubv2df4"
2154 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2158 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2159 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2160 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2167 "TARGET_FMA4 && TARGET_FUSED_MADD"
2168 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2169 [(set_attr "type" "ssemuladd")
2170 (set_attr "mode" "V2DF")])
2172 (define_insn "fma4_fmsubaddv8sf4"
2173 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2177 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2178 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2179 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2186 "TARGET_FMA4 && TARGET_FUSED_MADD"
2187 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2188 [(set_attr "type" "ssemuladd")
2189 (set_attr "mode" "V8SF")])
2191 (define_insn "fma4_fmsubaddv4df4"
2192 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2196 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2197 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2198 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2205 "TARGET_FMA4 && TARGET_FUSED_MADD"
2206 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2207 [(set_attr "type" "ssemuladd")
2208 (set_attr "mode" "V4DF")])
2210 (define_insn "fma4_fmsubaddv4sf4"
2211 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2215 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2216 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2217 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2224 "TARGET_FMA4 && TARGET_FUSED_MADD"
2225 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2226 [(set_attr "type" "ssemuladd")
2227 (set_attr "mode" "V4SF")])
2229 (define_insn "fma4_fmsubaddv2df4"
2230 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2234 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2235 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2236 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2243 "TARGET_FMA4 && TARGET_FUSED_MADD"
2244 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2245 [(set_attr "type" "ssemuladd")
2246 (set_attr "mode" "V2DF")])
2248 (define_insn "fma4i_fmaddsubv8sf4"
2249 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2254 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2255 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2256 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2263 UNSPEC_FMA4_INTRINSIC))]
2264 "TARGET_FMA4 && TARGET_FUSED_MADD"
2265 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2266 [(set_attr "type" "ssemuladd")
2267 (set_attr "mode" "V8SF")])
2269 (define_insn "fma4i_fmaddsubv4df4"
2270 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2275 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2276 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2277 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2284 UNSPEC_FMA4_INTRINSIC))]
2285 "TARGET_FMA4 && TARGET_FUSED_MADD"
2286 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2287 [(set_attr "type" "ssemuladd")
2288 (set_attr "mode" "V4DF")])
2290 (define_insn "fma4i_fmaddsubv4sf4"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2296 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2297 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2298 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2305 UNSPEC_FMA4_INTRINSIC))]
2306 "TARGET_FMA4 && TARGET_FUSED_MADD"
2307 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2308 [(set_attr "type" "ssemuladd")
2309 (set_attr "mode" "V4SF")])
2311 (define_insn "fma4i_fmaddsubv2df4"
2312 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2317 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2318 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2319 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2326 UNSPEC_FMA4_INTRINSIC))]
2327 "TARGET_FMA4 && TARGET_FUSED_MADD"
2328 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2329 [(set_attr "type" "ssemuladd")
2330 (set_attr "mode" "V2DF")])
2332 (define_insn "fma4i_fmsubaddv8sf4"
2333 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2338 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2339 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2340 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2347 UNSPEC_FMA4_INTRINSIC))]
2348 "TARGET_FMA4 && TARGET_FUSED_MADD"
2349 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2350 [(set_attr "type" "ssemuladd")
2351 (set_attr "mode" "V8SF")])
2353 (define_insn "fma4i_fmsubaddv4df4"
2354 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2359 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2360 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2361 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2368 UNSPEC_FMA4_INTRINSIC))]
2369 "TARGET_FMA4 && TARGET_FUSED_MADD"
2370 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2371 [(set_attr "type" "ssemuladd")
2372 (set_attr "mode" "V4DF")])
2374 (define_insn "fma4i_fmsubaddv4sf4"
2375 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2380 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2381 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2382 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2389 UNSPEC_FMA4_INTRINSIC))]
2390 "TARGET_FMA4 && TARGET_FUSED_MADD"
2391 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2392 [(set_attr "type" "ssemuladd")
2393 (set_attr "mode" "V4SF")])
2395 (define_insn "fma4i_fmsubaddv2df4"
2396 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2401 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2402 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2403 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2410 UNSPEC_FMA4_INTRINSIC))]
2411 "TARGET_FMA4 && TARGET_FUSED_MADD"
2412 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2413 [(set_attr "type" "ssemuladd")
2414 (set_attr "mode" "V2DF")])
2416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2418 ;; Parallel single-precision floating point conversion operations
2420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2422 (define_insn "sse_cvtpi2ps"
2423 [(set (match_operand:V4SF 0 "register_operand" "=x")
2426 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2427 (match_operand:V4SF 1 "register_operand" "0")
2430 "cvtpi2ps\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "ssecvt")
2432 (set_attr "mode" "V4SF")])
2434 (define_insn "sse_cvtps2pi"
2435 [(set (match_operand:V2SI 0 "register_operand" "=y")
2437 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2439 (parallel [(const_int 0) (const_int 1)])))]
2441 "cvtps2pi\t{%1, %0|%0, %1}"
2442 [(set_attr "type" "ssecvt")
2443 (set_attr "unit" "mmx")
2444 (set_attr "mode" "DI")])
2446 (define_insn "sse_cvttps2pi"
2447 [(set (match_operand:V2SI 0 "register_operand" "=y")
2449 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2450 (parallel [(const_int 0) (const_int 1)])))]
2452 "cvttps2pi\t{%1, %0|%0, %1}"
2453 [(set_attr "type" "ssecvt")
2454 (set_attr "unit" "mmx")
2455 (set_attr "prefix_rep" "0")
2456 (set_attr "mode" "SF")])
2458 (define_insn "*avx_cvtsi2ss"
2459 [(set (match_operand:V4SF 0 "register_operand" "=x")
2462 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2463 (match_operand:V4SF 1 "register_operand" "x")
2466 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2467 [(set_attr "type" "sseicvt")
2468 (set_attr "prefix" "vex")
2469 (set_attr "mode" "SF")])
2471 (define_insn "sse_cvtsi2ss"
2472 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2475 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2476 (match_operand:V4SF 1 "register_operand" "0,0")
2479 "cvtsi2ss\t{%2, %0|%0, %2}"
2480 [(set_attr "type" "sseicvt")
2481 (set_attr "athlon_decode" "vector,double")
2482 (set_attr "amdfam10_decode" "vector,double")
2483 (set_attr "mode" "SF")])
2485 (define_insn "*avx_cvtsi2ssq"
2486 [(set (match_operand:V4SF 0 "register_operand" "=x")
2489 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2490 (match_operand:V4SF 1 "register_operand" "x")
2492 "TARGET_AVX && TARGET_64BIT"
2493 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2494 [(set_attr "type" "sseicvt")
2495 (set_attr "length_vex" "4")
2496 (set_attr "prefix" "vex")
2497 (set_attr "mode" "SF")])
2499 (define_insn "sse_cvtsi2ssq"
2500 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2503 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2504 (match_operand:V4SF 1 "register_operand" "0,0")
2506 "TARGET_SSE && TARGET_64BIT"
2507 "cvtsi2ssq\t{%2, %0|%0, %2}"
2508 [(set_attr "type" "sseicvt")
2509 (set_attr "prefix_rex" "1")
2510 (set_attr "athlon_decode" "vector,double")
2511 (set_attr "amdfam10_decode" "vector,double")
2512 (set_attr "mode" "SF")])
2514 (define_insn "sse_cvtss2si"
2515 [(set (match_operand:SI 0 "register_operand" "=r,r")
2518 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2519 (parallel [(const_int 0)]))]
2520 UNSPEC_FIX_NOTRUNC))]
2522 "%vcvtss2si\t{%1, %0|%0, %1}"
2523 [(set_attr "type" "sseicvt")
2524 (set_attr "athlon_decode" "double,vector")
2525 (set_attr "prefix_rep" "1")
2526 (set_attr "prefix" "maybe_vex")
2527 (set_attr "mode" "SI")])
2529 (define_insn "sse_cvtss2si_2"
2530 [(set (match_operand:SI 0 "register_operand" "=r,r")
2531 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2532 UNSPEC_FIX_NOTRUNC))]
2534 "%vcvtss2si\t{%1, %0|%0, %1}"
2535 [(set_attr "type" "sseicvt")
2536 (set_attr "athlon_decode" "double,vector")
2537 (set_attr "amdfam10_decode" "double,double")
2538 (set_attr "prefix_rep" "1")
2539 (set_attr "prefix" "maybe_vex")
2540 (set_attr "mode" "SI")])
2542 (define_insn "sse_cvtss2siq"
2543 [(set (match_operand:DI 0 "register_operand" "=r,r")
2546 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2547 (parallel [(const_int 0)]))]
2548 UNSPEC_FIX_NOTRUNC))]
2549 "TARGET_SSE && TARGET_64BIT"
2550 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2551 [(set_attr "type" "sseicvt")
2552 (set_attr "athlon_decode" "double,vector")
2553 (set_attr "prefix_rep" "1")
2554 (set_attr "prefix" "maybe_vex")
2555 (set_attr "mode" "DI")])
2557 (define_insn "sse_cvtss2siq_2"
2558 [(set (match_operand:DI 0 "register_operand" "=r,r")
2559 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2560 UNSPEC_FIX_NOTRUNC))]
2561 "TARGET_SSE && TARGET_64BIT"
2562 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2563 [(set_attr "type" "sseicvt")
2564 (set_attr "athlon_decode" "double,vector")
2565 (set_attr "amdfam10_decode" "double,double")
2566 (set_attr "prefix_rep" "1")
2567 (set_attr "prefix" "maybe_vex")
2568 (set_attr "mode" "DI")])
2570 (define_insn "sse_cvttss2si"
2571 [(set (match_operand:SI 0 "register_operand" "=r,r")
2574 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2575 (parallel [(const_int 0)]))))]
2577 "%vcvttss2si\t{%1, %0|%0, %1}"
2578 [(set_attr "type" "sseicvt")
2579 (set_attr "athlon_decode" "double,vector")
2580 (set_attr "amdfam10_decode" "double,double")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "SI")])
2585 (define_insn "sse_cvttss2siq"
2586 [(set (match_operand:DI 0 "register_operand" "=r,r")
2589 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2590 (parallel [(const_int 0)]))))]
2591 "TARGET_SSE && TARGET_64BIT"
2592 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2593 [(set_attr "type" "sseicvt")
2594 (set_attr "athlon_decode" "double,vector")
2595 (set_attr "amdfam10_decode" "double,double")
2596 (set_attr "prefix_rep" "1")
2597 (set_attr "prefix" "maybe_vex")
2598 (set_attr "mode" "DI")])
2600 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2601 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2602 (float:AVXMODEDCVTDQ2PS
2603 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2605 "vcvtdq2ps\t{%1, %0|%0, %1}"
2606 [(set_attr "type" "ssecvt")
2607 (set_attr "prefix" "vex")
2608 (set_attr "mode" "<avxvecmode>")])
2610 (define_insn "sse2_cvtdq2ps"
2611 [(set (match_operand:V4SF 0 "register_operand" "=x")
2612 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2614 "cvtdq2ps\t{%1, %0|%0, %1}"
2615 [(set_attr "type" "ssecvt")
2616 (set_attr "mode" "V4SF")])
2618 (define_expand "sse2_cvtudq2ps"
2620 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2622 (lt:V4SF (match_dup 5) (match_dup 3)))
2624 (and:V4SF (match_dup 6) (match_dup 4)))
2625 (set (match_operand:V4SF 0 "register_operand" "")
2626 (plus:V4SF (match_dup 5) (match_dup 7)))]
2629 REAL_VALUE_TYPE TWO32r;
2633 real_ldexp (&TWO32r, &dconst1, 32);
2634 x = const_double_from_real_value (TWO32r, SFmode);
2636 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2637 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2639 for (i = 5; i < 8; i++)
2640 operands[i] = gen_reg_rtx (V4SFmode);
2643 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2644 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2645 (unspec:AVXMODEDCVTPS2DQ
2646 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2647 UNSPEC_FIX_NOTRUNC))]
2649 "vcvtps2dq\t{%1, %0|%0, %1}"
2650 [(set_attr "type" "ssecvt")
2651 (set_attr "prefix" "vex")
2652 (set_attr "mode" "<avxvecmode>")])
2654 (define_insn "sse2_cvtps2dq"
2655 [(set (match_operand:V4SI 0 "register_operand" "=x")
2656 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2657 UNSPEC_FIX_NOTRUNC))]
2659 "cvtps2dq\t{%1, %0|%0, %1}"
2660 [(set_attr "type" "ssecvt")
2661 (set_attr "prefix_data16" "1")
2662 (set_attr "mode" "TI")])
2664 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2665 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2666 (fix:AVXMODEDCVTPS2DQ
2667 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2669 "vcvttps2dq\t{%1, %0|%0, %1}"
2670 [(set_attr "type" "ssecvt")
2671 (set_attr "prefix" "vex")
2672 (set_attr "mode" "<avxvecmode>")])
2674 (define_insn "sse2_cvttps2dq"
2675 [(set (match_operand:V4SI 0 "register_operand" "=x")
2676 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2678 "cvttps2dq\t{%1, %0|%0, %1}"
2679 [(set_attr "type" "ssecvt")
2680 (set_attr "prefix_rep" "1")
2681 (set_attr "prefix_data16" "0")
2682 (set_attr "mode" "TI")])
2684 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2686 ;; Parallel double-precision floating point conversion operations
2688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2690 (define_insn "sse2_cvtpi2pd"
2691 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2692 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2694 "cvtpi2pd\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "unit" "mmx,*")
2697 (set_attr "prefix_data16" "1,*")
2698 (set_attr "mode" "V2DF")])
2700 (define_insn "sse2_cvtpd2pi"
2701 [(set (match_operand:V2SI 0 "register_operand" "=y")
2702 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2703 UNSPEC_FIX_NOTRUNC))]
2705 "cvtpd2pi\t{%1, %0|%0, %1}"
2706 [(set_attr "type" "ssecvt")
2707 (set_attr "unit" "mmx")
2708 (set_attr "prefix_data16" "1")
2709 (set_attr "mode" "DI")])
2711 (define_insn "sse2_cvttpd2pi"
2712 [(set (match_operand:V2SI 0 "register_operand" "=y")
2713 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2715 "cvttpd2pi\t{%1, %0|%0, %1}"
2716 [(set_attr "type" "ssecvt")
2717 (set_attr "unit" "mmx")
2718 (set_attr "prefix_data16" "1")
2719 (set_attr "mode" "TI")])
2721 (define_insn "*avx_cvtsi2sd"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2725 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2726 (match_operand:V2DF 1 "register_operand" "x")
2729 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2730 [(set_attr "type" "sseicvt")
2731 (set_attr "prefix" "vex")
2732 (set_attr "mode" "DF")])
2734 (define_insn "sse2_cvtsi2sd"
2735 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2738 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2739 (match_operand:V2DF 1 "register_operand" "0,0")
2742 "cvtsi2sd\t{%2, %0|%0, %2}"
2743 [(set_attr "type" "sseicvt")
2744 (set_attr "mode" "DF")
2745 (set_attr "athlon_decode" "double,direct")
2746 (set_attr "amdfam10_decode" "vector,double")])
2748 (define_insn "*avx_cvtsi2sdq"
2749 [(set (match_operand:V2DF 0 "register_operand" "=x")
2752 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2753 (match_operand:V2DF 1 "register_operand" "x")
2755 "TARGET_AVX && TARGET_64BIT"
2756 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2757 [(set_attr "type" "sseicvt")
2758 (set_attr "length_vex" "4")
2759 (set_attr "prefix" "vex")
2760 (set_attr "mode" "DF")])
2762 (define_insn "sse2_cvtsi2sdq"
2763 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2766 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2767 (match_operand:V2DF 1 "register_operand" "0,0")
2769 "TARGET_SSE2 && TARGET_64BIT"
2770 "cvtsi2sdq\t{%2, %0|%0, %2}"
2771 [(set_attr "type" "sseicvt")
2772 (set_attr "prefix_rex" "1")
2773 (set_attr "mode" "DF")
2774 (set_attr "athlon_decode" "double,direct")
2775 (set_attr "amdfam10_decode" "vector,double")])
2777 (define_insn "sse2_cvtsd2si"
2778 [(set (match_operand:SI 0 "register_operand" "=r,r")
2781 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2782 (parallel [(const_int 0)]))]
2783 UNSPEC_FIX_NOTRUNC))]
2785 "%vcvtsd2si\t{%1, %0|%0, %1}"
2786 [(set_attr "type" "sseicvt")
2787 (set_attr "athlon_decode" "double,vector")
2788 (set_attr "prefix_rep" "1")
2789 (set_attr "prefix" "maybe_vex")
2790 (set_attr "mode" "SI")])
2792 (define_insn "sse2_cvtsd2si_2"
2793 [(set (match_operand:SI 0 "register_operand" "=r,r")
2794 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2795 UNSPEC_FIX_NOTRUNC))]
2797 "%vcvtsd2si\t{%1, %0|%0, %1}"
2798 [(set_attr "type" "sseicvt")
2799 (set_attr "athlon_decode" "double,vector")
2800 (set_attr "amdfam10_decode" "double,double")
2801 (set_attr "prefix_rep" "1")
2802 (set_attr "prefix" "maybe_vex")
2803 (set_attr "mode" "SI")])
2805 (define_insn "sse2_cvtsd2siq"
2806 [(set (match_operand:DI 0 "register_operand" "=r,r")
2809 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2810 (parallel [(const_int 0)]))]
2811 UNSPEC_FIX_NOTRUNC))]
2812 "TARGET_SSE2 && TARGET_64BIT"
2813 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2814 [(set_attr "type" "sseicvt")
2815 (set_attr "athlon_decode" "double,vector")
2816 (set_attr "prefix_rep" "1")
2817 (set_attr "prefix" "maybe_vex")
2818 (set_attr "mode" "DI")])
2820 (define_insn "sse2_cvtsd2siq_2"
2821 [(set (match_operand:DI 0 "register_operand" "=r,r")
2822 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2823 UNSPEC_FIX_NOTRUNC))]
2824 "TARGET_SSE2 && TARGET_64BIT"
2825 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2826 [(set_attr "type" "sseicvt")
2827 (set_attr "athlon_decode" "double,vector")
2828 (set_attr "amdfam10_decode" "double,double")
2829 (set_attr "prefix_rep" "1")
2830 (set_attr "prefix" "maybe_vex")
2831 (set_attr "mode" "DI")])
2833 (define_insn "sse2_cvttsd2si"
2834 [(set (match_operand:SI 0 "register_operand" "=r,r")
2837 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2838 (parallel [(const_int 0)]))))]
2840 "%vcvttsd2si\t{%1, %0|%0, %1}"
2841 [(set_attr "type" "sseicvt")
2842 (set_attr "prefix_rep" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "SI")
2845 (set_attr "athlon_decode" "double,vector")
2846 (set_attr "amdfam10_decode" "double,double")])
2848 (define_insn "sse2_cvttsd2siq"
2849 [(set (match_operand:DI 0 "register_operand" "=r,r")
2852 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2853 (parallel [(const_int 0)]))))]
2854 "TARGET_SSE2 && TARGET_64BIT"
2855 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2856 [(set_attr "type" "sseicvt")
2857 (set_attr "prefix_rep" "1")
2858 (set_attr "prefix" "maybe_vex")
2859 (set_attr "mode" "DI")
2860 (set_attr "athlon_decode" "double,vector")
2861 (set_attr "amdfam10_decode" "double,double")])
2863 (define_insn "avx_cvtdq2pd256"
2864 [(set (match_operand:V4DF 0 "register_operand" "=x")
2865 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2867 "vcvtdq2pd\t{%1, %0|%0, %1}"
2868 [(set_attr "type" "ssecvt")
2869 (set_attr "prefix" "vex")
2870 (set_attr "mode" "V4DF")])
2872 (define_insn "sse2_cvtdq2pd"
2873 [(set (match_operand:V2DF 0 "register_operand" "=x")
2876 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2877 (parallel [(const_int 0) (const_int 1)]))))]
2879 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2880 [(set_attr "type" "ssecvt")
2881 (set_attr "prefix" "maybe_vex")
2882 (set_attr "mode" "V2DF")])
2884 (define_insn "avx_cvtpd2dq256"
2885 [(set (match_operand:V4SI 0 "register_operand" "=x")
2886 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2887 UNSPEC_FIX_NOTRUNC))]
2889 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2890 [(set_attr "type" "ssecvt")
2891 (set_attr "prefix" "vex")
2892 (set_attr "mode" "OI")])
2894 (define_expand "sse2_cvtpd2dq"
2895 [(set (match_operand:V4SI 0 "register_operand" "")
2897 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2901 "operands[2] = CONST0_RTX (V2SImode);")
2903 (define_insn "*sse2_cvtpd2dq"
2904 [(set (match_operand:V4SI 0 "register_operand" "=x")
2906 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2908 (match_operand:V2SI 2 "const0_operand" "")))]
2910 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2911 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2912 [(set_attr "type" "ssecvt")
2913 (set_attr "prefix_rep" "1")
2914 (set_attr "prefix_data16" "0")
2915 (set_attr "prefix" "maybe_vex")
2916 (set_attr "mode" "TI")
2917 (set_attr "amdfam10_decode" "double")])
2919 (define_insn "avx_cvttpd2dq256"
2920 [(set (match_operand:V4SI 0 "register_operand" "=x")
2921 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2923 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2924 [(set_attr "type" "ssecvt")
2925 (set_attr "prefix" "vex")
2926 (set_attr "mode" "OI")])
2928 (define_expand "sse2_cvttpd2dq"
2929 [(set (match_operand:V4SI 0 "register_operand" "")
2931 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2934 "operands[2] = CONST0_RTX (V2SImode);")
2936 (define_insn "*sse2_cvttpd2dq"
2937 [(set (match_operand:V4SI 0 "register_operand" "=x")
2939 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2940 (match_operand:V2SI 2 "const0_operand" "")))]
2942 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2943 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2944 [(set_attr "type" "ssecvt")
2945 (set_attr "prefix" "maybe_vex")
2946 (set_attr "mode" "TI")
2947 (set_attr "amdfam10_decode" "double")])
2949 (define_insn "*avx_cvtsd2ss"
2950 [(set (match_operand:V4SF 0 "register_operand" "=x")
2953 (float_truncate:V2SF
2954 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2955 (match_operand:V4SF 1 "register_operand" "x")
2958 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2959 [(set_attr "type" "ssecvt")
2960 (set_attr "prefix" "vex")
2961 (set_attr "mode" "SF")])
2963 (define_insn "sse2_cvtsd2ss"
2964 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2967 (float_truncate:V2SF
2968 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2969 (match_operand:V4SF 1 "register_operand" "0,0")
2972 "cvtsd2ss\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "ssecvt")
2974 (set_attr "athlon_decode" "vector,double")
2975 (set_attr "amdfam10_decode" "vector,double")
2976 (set_attr "mode" "SF")])
2978 (define_insn "*avx_cvtss2sd"
2979 [(set (match_operand:V2DF 0 "register_operand" "=x")
2983 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2984 (parallel [(const_int 0) (const_int 1)])))
2985 (match_operand:V2DF 1 "register_operand" "x")
2988 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2989 [(set_attr "type" "ssecvt")
2990 (set_attr "prefix" "vex")
2991 (set_attr "mode" "DF")])
2993 (define_insn "sse2_cvtss2sd"
2994 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2998 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2999 (parallel [(const_int 0) (const_int 1)])))
3000 (match_operand:V2DF 1 "register_operand" "0,0")
3003 "cvtss2sd\t{%2, %0|%0, %2}"
3004 [(set_attr "type" "ssecvt")
3005 (set_attr "amdfam10_decode" "vector,double")
3006 (set_attr "mode" "DF")])
3008 (define_insn "avx_cvtpd2ps256"
3009 [(set (match_operand:V4SF 0 "register_operand" "=x")
3010 (float_truncate:V4SF
3011 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3013 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3014 [(set_attr "type" "ssecvt")
3015 (set_attr "prefix" "vex")
3016 (set_attr "mode" "V4SF")])
3018 (define_expand "sse2_cvtpd2ps"
3019 [(set (match_operand:V4SF 0 "register_operand" "")
3021 (float_truncate:V2SF
3022 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3025 "operands[2] = CONST0_RTX (V2SFmode);")
3027 (define_insn "*sse2_cvtpd2ps"
3028 [(set (match_operand:V4SF 0 "register_operand" "=x")
3030 (float_truncate:V2SF
3031 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3032 (match_operand:V2SF 2 "const0_operand" "")))]
3034 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3035 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3036 [(set_attr "type" "ssecvt")
3037 (set_attr "prefix_data16" "1")
3038 (set_attr "prefix" "maybe_vex")
3039 (set_attr "mode" "V4SF")
3040 (set_attr "amdfam10_decode" "double")])
3042 (define_insn "avx_cvtps2pd256"
3043 [(set (match_operand:V4DF 0 "register_operand" "=x")
3045 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3047 "vcvtps2pd\t{%1, %0|%0, %1}"
3048 [(set_attr "type" "ssecvt")
3049 (set_attr "prefix" "vex")
3050 (set_attr "mode" "V4DF")])
3052 (define_insn "sse2_cvtps2pd"
3053 [(set (match_operand:V2DF 0 "register_operand" "=x")
3056 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3057 (parallel [(const_int 0) (const_int 1)]))))]
3059 "%vcvtps2pd\t{%1, %0|%0, %1}"
3060 [(set_attr "type" "ssecvt")
3061 (set_attr "prefix" "maybe_vex")
3062 (set_attr "mode" "V2DF")
3063 (set_attr "prefix_data16" "0")
3064 (set_attr "amdfam10_decode" "direct")])
3066 (define_expand "vec_unpacks_hi_v4sf"
3071 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3072 (parallel [(const_int 6)
3076 (set (match_operand:V2DF 0 "register_operand" "")
3080 (parallel [(const_int 0) (const_int 1)]))))]
3083 operands[2] = gen_reg_rtx (V4SFmode);
3086 (define_expand "vec_unpacks_lo_v4sf"
3087 [(set (match_operand:V2DF 0 "register_operand" "")
3090 (match_operand:V4SF 1 "nonimmediate_operand" "")
3091 (parallel [(const_int 0) (const_int 1)]))))]
3094 (define_expand "vec_unpacks_float_hi_v8hi"
3095 [(match_operand:V4SF 0 "register_operand" "")
3096 (match_operand:V8HI 1 "register_operand" "")]
3099 rtx tmp = gen_reg_rtx (V4SImode);
3101 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3102 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3106 (define_expand "vec_unpacks_float_lo_v8hi"
3107 [(match_operand:V4SF 0 "register_operand" "")
3108 (match_operand:V8HI 1 "register_operand" "")]
3111 rtx tmp = gen_reg_rtx (V4SImode);
3113 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3114 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3118 (define_expand "vec_unpacku_float_hi_v8hi"
3119 [(match_operand:V4SF 0 "register_operand" "")
3120 (match_operand:V8HI 1 "register_operand" "")]
3123 rtx tmp = gen_reg_rtx (V4SImode);
3125 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3126 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3130 (define_expand "vec_unpacku_float_lo_v8hi"
3131 [(match_operand:V4SF 0 "register_operand" "")
3132 (match_operand:V8HI 1 "register_operand" "")]
3135 rtx tmp = gen_reg_rtx (V4SImode);
3137 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3138 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3142 (define_expand "vec_unpacks_float_hi_v4si"
3145 (match_operand:V4SI 1 "nonimmediate_operand" "")
3146 (parallel [(const_int 2)
3150 (set (match_operand:V2DF 0 "register_operand" "")
3154 (parallel [(const_int 0) (const_int 1)]))))]
3156 "operands[2] = gen_reg_rtx (V4SImode);")
3158 (define_expand "vec_unpacks_float_lo_v4si"
3159 [(set (match_operand:V2DF 0 "register_operand" "")
3162 (match_operand:V4SI 1 "nonimmediate_operand" "")
3163 (parallel [(const_int 0) (const_int 1)]))))]
3166 (define_expand "vec_unpacku_float_hi_v4si"
3169 (match_operand:V4SI 1 "nonimmediate_operand" "")
3170 (parallel [(const_int 2)
3178 (parallel [(const_int 0) (const_int 1)]))))
3180 (lt:V2DF (match_dup 6) (match_dup 3)))
3182 (and:V2DF (match_dup 7) (match_dup 4)))
3183 (set (match_operand:V2DF 0 "register_operand" "")
3184 (plus:V2DF (match_dup 6) (match_dup 8)))]
3187 REAL_VALUE_TYPE TWO32r;
3191 real_ldexp (&TWO32r, &dconst1, 32);
3192 x = const_double_from_real_value (TWO32r, DFmode);
3194 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3195 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3197 operands[5] = gen_reg_rtx (V4SImode);
3199 for (i = 6; i < 9; i++)
3200 operands[i] = gen_reg_rtx (V2DFmode);
3203 (define_expand "vec_unpacku_float_lo_v4si"
3207 (match_operand:V4SI 1 "nonimmediate_operand" "")
3208 (parallel [(const_int 0) (const_int 1)]))))
3210 (lt:V2DF (match_dup 5) (match_dup 3)))
3212 (and:V2DF (match_dup 6) (match_dup 4)))
3213 (set (match_operand:V2DF 0 "register_operand" "")
3214 (plus:V2DF (match_dup 5) (match_dup 7)))]
3217 REAL_VALUE_TYPE TWO32r;
3221 real_ldexp (&TWO32r, &dconst1, 32);
3222 x = const_double_from_real_value (TWO32r, DFmode);
3224 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3225 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3227 for (i = 5; i < 8; i++)
3228 operands[i] = gen_reg_rtx (V2DFmode);
3231 (define_expand "vec_pack_trunc_v2df"
3232 [(match_operand:V4SF 0 "register_operand" "")
3233 (match_operand:V2DF 1 "nonimmediate_operand" "")
3234 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3239 r1 = gen_reg_rtx (V4SFmode);
3240 r2 = gen_reg_rtx (V4SFmode);
3242 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3243 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3244 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3248 (define_expand "vec_pack_sfix_trunc_v2df"
3249 [(match_operand:V4SI 0 "register_operand" "")
3250 (match_operand:V2DF 1 "nonimmediate_operand" "")
3251 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3256 r1 = gen_reg_rtx (V4SImode);
3257 r2 = gen_reg_rtx (V4SImode);
3259 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3260 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3261 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3262 gen_lowpart (V2DImode, r1),
3263 gen_lowpart (V2DImode, r2)));
3267 (define_expand "vec_pack_sfix_v2df"
3268 [(match_operand:V4SI 0 "register_operand" "")
3269 (match_operand:V2DF 1 "nonimmediate_operand" "")
3270 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3275 r1 = gen_reg_rtx (V4SImode);
3276 r2 = gen_reg_rtx (V4SImode);
3278 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3279 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3280 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3281 gen_lowpart (V2DImode, r1),
3282 gen_lowpart (V2DImode, r2)));
3286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3288 ;; Parallel single-precision floating point element swizzling
3290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3292 (define_expand "sse_movhlps_exp"
3293 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3296 (match_operand:V4SF 1 "nonimmediate_operand" "")
3297 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3298 (parallel [(const_int 6)
3303 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3305 (define_insn "*avx_movhlps"
3306 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3309 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3310 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3311 (parallel [(const_int 6)
3315 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3317 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3318 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3319 vmovhps\t{%2, %0|%0, %2}"
3320 [(set_attr "type" "ssemov")
3321 (set_attr "prefix" "vex")
3322 (set_attr "mode" "V4SF,V2SF,V2SF")])
3324 (define_insn "sse_movhlps"
3325 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3328 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3329 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3330 (parallel [(const_int 6)
3334 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3336 movhlps\t{%2, %0|%0, %2}
3337 movlps\t{%H2, %0|%0, %H2}
3338 movhps\t{%2, %0|%0, %2}"
3339 [(set_attr "type" "ssemov")
3340 (set_attr "mode" "V4SF,V2SF,V2SF")])
3342 (define_expand "sse_movlhps_exp"
3343 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3346 (match_operand:V4SF 1 "nonimmediate_operand" "")
3347 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3348 (parallel [(const_int 0)
3353 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3355 (define_insn "*avx_movlhps"
3356 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3359 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3360 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3361 (parallel [(const_int 0)
3365 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3367 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3368 vmovhps\t{%2, %1, %0|%0, %1, %2}
3369 vmovlps\t{%2, %H0|%H0, %2}"
3370 [(set_attr "type" "ssemov")
3371 (set_attr "prefix" "vex")
3372 (set_attr "mode" "V4SF,V2SF,V2SF")])
3374 (define_insn "sse_movlhps"
3375 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3378 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3379 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3380 (parallel [(const_int 0)
3384 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3386 movlhps\t{%2, %0|%0, %2}
3387 movhps\t{%2, %0|%0, %2}
3388 movlps\t{%2, %H0|%H0, %2}"
3389 [(set_attr "type" "ssemov")
3390 (set_attr "mode" "V4SF,V2SF,V2SF")])
3392 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3393 (define_insn "avx_unpckhps256"
3394 [(set (match_operand:V8SF 0 "register_operand" "=x")
3397 (match_operand:V8SF 1 "register_operand" "x")
3398 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3399 (parallel [(const_int 2) (const_int 10)
3400 (const_int 3) (const_int 11)
3401 (const_int 6) (const_int 14)
3402 (const_int 7) (const_int 15)])))]
3404 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3405 [(set_attr "type" "sselog")
3406 (set_attr "prefix" "vex")
3407 (set_attr "mode" "V8SF")])
3409 (define_insn "*avx_interleave_highv4sf"
3410 [(set (match_operand:V4SF 0 "register_operand" "=x")
3413 (match_operand:V4SF 1 "register_operand" "x")
3414 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3415 (parallel [(const_int 2) (const_int 6)
3416 (const_int 3) (const_int 7)])))]
3418 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3419 [(set_attr "type" "sselog")
3420 (set_attr "prefix" "vex")
3421 (set_attr "mode" "V4SF")])
3423 (define_insn "vec_interleave_highv4sf"
3424 [(set (match_operand:V4SF 0 "register_operand" "=x")
3427 (match_operand:V4SF 1 "register_operand" "0")
3428 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3429 (parallel [(const_int 2) (const_int 6)
3430 (const_int 3) (const_int 7)])))]
3432 "unpckhps\t{%2, %0|%0, %2}"
3433 [(set_attr "type" "sselog")
3434 (set_attr "mode" "V4SF")])
3436 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437 (define_insn "avx_unpcklps256"
3438 [(set (match_operand:V8SF 0 "register_operand" "=x")
3441 (match_operand:V8SF 1 "register_operand" "x")
3442 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443 (parallel [(const_int 0) (const_int 8)
3444 (const_int 1) (const_int 9)
3445 (const_int 4) (const_int 12)
3446 (const_int 5) (const_int 13)])))]
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V8SF")])
3453 (define_insn "*avx_interleave_lowv4sf"
3454 [(set (match_operand:V4SF 0 "register_operand" "=x")
3457 (match_operand:V4SF 1 "register_operand" "x")
3458 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 4)
3460 (const_int 1) (const_int 5)])))]
3462 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3463 [(set_attr "type" "sselog")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "V4SF")])
3467 (define_insn "vec_interleave_lowv4sf"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3471 (match_operand:V4SF 1 "register_operand" "0")
3472 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3473 (parallel [(const_int 0) (const_int 4)
3474 (const_int 1) (const_int 5)])))]
3476 "unpcklps\t{%2, %0|%0, %2}"
3477 [(set_attr "type" "sselog")
3478 (set_attr "mode" "V4SF")])
3480 ;; These are modeled with the same vec_concat as the others so that we
3481 ;; capture users of shufps that can use the new instructions
3482 (define_insn "avx_movshdup256"
3483 [(set (match_operand:V8SF 0 "register_operand" "=x")
3486 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3488 (parallel [(const_int 1) (const_int 1)
3489 (const_int 3) (const_int 3)
3490 (const_int 5) (const_int 5)
3491 (const_int 7) (const_int 7)])))]
3493 "vmovshdup\t{%1, %0|%0, %1}"
3494 [(set_attr "type" "sse")
3495 (set_attr "prefix" "vex")
3496 (set_attr "mode" "V8SF")])
3498 (define_insn "sse3_movshdup"
3499 [(set (match_operand:V4SF 0 "register_operand" "=x")
3502 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3504 (parallel [(const_int 1)
3509 "%vmovshdup\t{%1, %0|%0, %1}"
3510 [(set_attr "type" "sse")
3511 (set_attr "prefix_rep" "1")
3512 (set_attr "prefix" "maybe_vex")
3513 (set_attr "mode" "V4SF")])
3515 (define_insn "avx_movsldup256"
3516 [(set (match_operand:V8SF 0 "register_operand" "=x")
3519 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3521 (parallel [(const_int 0) (const_int 0)
3522 (const_int 2) (const_int 2)
3523 (const_int 4) (const_int 4)
3524 (const_int 6) (const_int 6)])))]
3526 "vmovsldup\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "sse")
3528 (set_attr "prefix" "vex")
3529 (set_attr "mode" "V8SF")])
3531 (define_insn "sse3_movsldup"
3532 [(set (match_operand:V4SF 0 "register_operand" "=x")
3535 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3537 (parallel [(const_int 0)
3542 "%vmovsldup\t{%1, %0|%0, %1}"
3543 [(set_attr "type" "sse")
3544 (set_attr "prefix_rep" "1")
3545 (set_attr "prefix" "maybe_vex")
3546 (set_attr "mode" "V4SF")])
3548 (define_expand "avx_shufps256"
3549 [(match_operand:V8SF 0 "register_operand" "")
3550 (match_operand:V8SF 1 "register_operand" "")
3551 (match_operand:V8SF 2 "nonimmediate_operand" "")
3552 (match_operand:SI 3 "const_int_operand" "")]
3555 int mask = INTVAL (operands[3]);
3556 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3557 GEN_INT ((mask >> 0) & 3),
3558 GEN_INT ((mask >> 2) & 3),
3559 GEN_INT (((mask >> 4) & 3) + 8),
3560 GEN_INT (((mask >> 6) & 3) + 8),
3561 GEN_INT (((mask >> 0) & 3) + 4),
3562 GEN_INT (((mask >> 2) & 3) + 4),
3563 GEN_INT (((mask >> 4) & 3) + 12),
3564 GEN_INT (((mask >> 6) & 3) + 12)));
3568 ;; One bit in mask selects 2 elements.
3569 (define_insn "avx_shufps256_1"
3570 [(set (match_operand:V8SF 0 "register_operand" "=x")
3573 (match_operand:V8SF 1 "register_operand" "x")
3574 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3575 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3576 (match_operand 4 "const_0_to_3_operand" "")
3577 (match_operand 5 "const_8_to_11_operand" "")
3578 (match_operand 6 "const_8_to_11_operand" "")
3579 (match_operand 7 "const_4_to_7_operand" "")
3580 (match_operand 8 "const_4_to_7_operand" "")
3581 (match_operand 9 "const_12_to_15_operand" "")
3582 (match_operand 10 "const_12_to_15_operand" "")])))]
3584 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3585 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3586 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3587 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3590 mask = INTVAL (operands[3]);
3591 mask |= INTVAL (operands[4]) << 2;
3592 mask |= (INTVAL (operands[5]) - 8) << 4;
3593 mask |= (INTVAL (operands[6]) - 8) << 6;
3594 operands[3] = GEN_INT (mask);
3596 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3598 [(set_attr "type" "sselog")
3599 (set_attr "length_immediate" "1")
3600 (set_attr "prefix" "vex")
3601 (set_attr "mode" "V8SF")])
3603 (define_expand "sse_shufps"
3604 [(match_operand:V4SF 0 "register_operand" "")
3605 (match_operand:V4SF 1 "register_operand" "")
3606 (match_operand:V4SF 2 "nonimmediate_operand" "")
3607 (match_operand:SI 3 "const_int_operand" "")]
3610 int mask = INTVAL (operands[3]);
3611 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3612 GEN_INT ((mask >> 0) & 3),
3613 GEN_INT ((mask >> 2) & 3),
3614 GEN_INT (((mask >> 4) & 3) + 4),
3615 GEN_INT (((mask >> 6) & 3) + 4)));
3619 (define_insn "*avx_shufps_<mode>"
3620 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3621 (vec_select:SSEMODE4S
3622 (vec_concat:<ssedoublesizemode>
3623 (match_operand:SSEMODE4S 1 "register_operand" "x")
3624 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3625 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3626 (match_operand 4 "const_0_to_3_operand" "")
3627 (match_operand 5 "const_4_to_7_operand" "")
3628 (match_operand 6 "const_4_to_7_operand" "")])))]
3632 mask |= INTVAL (operands[3]) << 0;
3633 mask |= INTVAL (operands[4]) << 2;
3634 mask |= (INTVAL (operands[5]) - 4) << 4;
3635 mask |= (INTVAL (operands[6]) - 4) << 6;
3636 operands[3] = GEN_INT (mask);
3638 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3640 [(set_attr "type" "sselog")
3641 (set_attr "length_immediate" "1")
3642 (set_attr "prefix" "vex")
3643 (set_attr "mode" "V4SF")])
3645 (define_insn "sse_shufps_<mode>"
3646 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3647 (vec_select:SSEMODE4S
3648 (vec_concat:<ssedoublesizemode>
3649 (match_operand:SSEMODE4S 1 "register_operand" "0")
3650 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3651 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3652 (match_operand 4 "const_0_to_3_operand" "")
3653 (match_operand 5 "const_4_to_7_operand" "")
3654 (match_operand 6 "const_4_to_7_operand" "")])))]
3658 mask |= INTVAL (operands[3]) << 0;
3659 mask |= INTVAL (operands[4]) << 2;
3660 mask |= (INTVAL (operands[5]) - 4) << 4;
3661 mask |= (INTVAL (operands[6]) - 4) << 6;
3662 operands[3] = GEN_INT (mask);
3664 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3666 [(set_attr "type" "sselog")
3667 (set_attr "length_immediate" "1")
3668 (set_attr "mode" "V4SF")])
3670 (define_insn "sse_storehps"
3671 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3673 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3674 (parallel [(const_int 2) (const_int 3)])))]
3677 %vmovhps\t{%1, %0|%0, %1}
3678 %vmovhlps\t{%1, %d0|%d0, %1}
3679 %vmovlps\t{%H1, %d0|%d0, %H1}"
3680 [(set_attr "type" "ssemov")
3681 (set_attr "prefix" "maybe_vex")
3682 (set_attr "mode" "V2SF,V4SF,V2SF")])
3684 (define_expand "sse_loadhps_exp"
3685 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3688 (match_operand:V4SF 1 "nonimmediate_operand" "")
3689 (parallel [(const_int 0) (const_int 1)]))
3690 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3692 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3694 (define_insn "*avx_loadhps"
3695 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3698 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3699 (parallel [(const_int 0) (const_int 1)]))
3700 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3703 vmovhps\t{%2, %1, %0|%0, %1, %2}
3704 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3705 vmovlps\t{%2, %H0|%H0, %2}"
3706 [(set_attr "type" "ssemov")
3707 (set_attr "prefix" "vex")
3708 (set_attr "mode" "V2SF,V4SF,V2SF")])
3710 (define_insn "sse_loadhps"
3711 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3714 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3715 (parallel [(const_int 0) (const_int 1)]))
3716 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3719 movhps\t{%2, %0|%0, %2}
3720 movlhps\t{%2, %0|%0, %2}
3721 movlps\t{%2, %H0|%H0, %2}"
3722 [(set_attr "type" "ssemov")
3723 (set_attr "mode" "V2SF,V4SF,V2SF")])
3725 (define_insn "*avx_storelps"
3726 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3728 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3729 (parallel [(const_int 0) (const_int 1)])))]
3732 vmovlps\t{%1, %0|%0, %1}
3733 vmovaps\t{%1, %0|%0, %1}
3734 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3735 [(set_attr "type" "ssemov")
3736 (set_attr "prefix" "vex")
3737 (set_attr "mode" "V2SF,V2DF,V2SF")])
3739 (define_insn "sse_storelps"
3740 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3742 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3743 (parallel [(const_int 0) (const_int 1)])))]
3746 movlps\t{%1, %0|%0, %1}
3747 movaps\t{%1, %0|%0, %1}
3748 movlps\t{%1, %0|%0, %1}"
3749 [(set_attr "type" "ssemov")
3750 (set_attr "mode" "V2SF,V4SF,V2SF")])
3752 (define_expand "sse_loadlps_exp"
3753 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3755 (match_operand:V2SF 2 "nonimmediate_operand" "")
3757 (match_operand:V4SF 1 "nonimmediate_operand" "")
3758 (parallel [(const_int 2) (const_int 3)]))))]
3760 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3762 (define_insn "*avx_loadlps"
3763 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3765 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3767 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3768 (parallel [(const_int 2) (const_int 3)]))))]
3771 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3772 vmovlps\t{%2, %1, %0|%0, %1, %2}
3773 vmovlps\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog,ssemov,ssemov")
3775 (set_attr "length_immediate" "1,*,*")
3776 (set_attr "prefix" "vex")
3777 (set_attr "mode" "V4SF,V2SF,V2SF")])
3779 (define_insn "sse_loadlps"
3780 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3782 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3784 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3785 (parallel [(const_int 2) (const_int 3)]))))]
3788 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3789 movlps\t{%2, %0|%0, %2}
3790 movlps\t{%2, %0|%0, %2}"
3791 [(set_attr "type" "sselog,ssemov,ssemov")
3792 (set_attr "length_immediate" "1,*,*")
3793 (set_attr "mode" "V4SF,V2SF,V2SF")])
3795 (define_insn "*avx_movss"
3796 [(set (match_operand:V4SF 0 "register_operand" "=x")
3798 (match_operand:V4SF 2 "register_operand" "x")
3799 (match_operand:V4SF 1 "register_operand" "x")
3802 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3803 [(set_attr "type" "ssemov")
3804 (set_attr "prefix" "vex")
3805 (set_attr "mode" "SF")])
3807 (define_insn "sse_movss"
3808 [(set (match_operand:V4SF 0 "register_operand" "=x")
3810 (match_operand:V4SF 2 "register_operand" "x")
3811 (match_operand:V4SF 1 "register_operand" "0")
3814 "movss\t{%2, %0|%0, %2}"
3815 [(set_attr "type" "ssemov")
3816 (set_attr "mode" "SF")])
3818 (define_expand "vec_dupv4sf"
3819 [(set (match_operand:V4SF 0 "register_operand" "")
3821 (match_operand:SF 1 "nonimmediate_operand" "")))]
3825 operands[1] = force_reg (V4SFmode, operands[1]);
3828 (define_insn "*vec_dupv4sf_avx"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3831 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3834 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3835 vbroadcastss\t{%1, %0|%0, %1}"
3836 [(set_attr "type" "sselog1,ssemov")
3837 (set_attr "length_immediate" "1,0")
3838 (set_attr "prefix_extra" "0,1")
3839 (set_attr "prefix" "vex")
3840 (set_attr "mode" "V4SF")])
3842 (define_insn "*vec_dupv4sf"
3843 [(set (match_operand:V4SF 0 "register_operand" "=x")
3845 (match_operand:SF 1 "register_operand" "0")))]
3847 "shufps\t{$0, %0, %0|%0, %0, 0}"
3848 [(set_attr "type" "sselog1")
3849 (set_attr "length_immediate" "1")
3850 (set_attr "mode" "V4SF")])
3852 (define_insn "*vec_concatv2sf_avx"
3853 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3855 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3856 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3859 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3860 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3861 vmovss\t{%1, %0|%0, %1}
3862 punpckldq\t{%2, %0|%0, %2}
3863 movd\t{%1, %0|%0, %1}"
3864 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3865 (set_attr "length_immediate" "*,1,*,*,*")
3866 (set_attr "prefix_extra" "*,1,*,*,*")
3867 (set (attr "prefix")
3868 (if_then_else (eq_attr "alternative" "3,4")
3869 (const_string "orig")
3870 (const_string "vex")))
3871 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3873 ;; Although insertps takes register source, we prefer
3874 ;; unpcklps with register source since it is shorter.
3875 (define_insn "*vec_concatv2sf_sse4_1"
3876 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3878 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3879 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3882 unpcklps\t{%2, %0|%0, %2}
3883 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3884 movss\t{%1, %0|%0, %1}
3885 punpckldq\t{%2, %0|%0, %2}
3886 movd\t{%1, %0|%0, %1}"
3887 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3888 (set_attr "prefix_data16" "*,1,*,*,*")
3889 (set_attr "prefix_extra" "*,1,*,*,*")
3890 (set_attr "length_immediate" "*,1,*,*,*")
3891 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3893 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3894 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3895 ;; alternatives pretty much forces the MMX alternative to be chosen.
3896 (define_insn "*vec_concatv2sf_sse"
3897 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3899 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3900 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3903 unpcklps\t{%2, %0|%0, %2}
3904 movss\t{%1, %0|%0, %1}
3905 punpckldq\t{%2, %0|%0, %2}
3906 movd\t{%1, %0|%0, %1}"
3907 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3908 (set_attr "mode" "V4SF,SF,DI,DI")])
3910 (define_insn "*vec_concatv4sf_avx"
3911 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3913 (match_operand:V2SF 1 "register_operand" " x,x")
3914 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3917 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3918 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3919 [(set_attr "type" "ssemov")
3920 (set_attr "prefix" "vex")
3921 (set_attr "mode" "V4SF,V2SF")])
3923 (define_insn "*vec_concatv4sf_sse"
3924 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3926 (match_operand:V2SF 1 "register_operand" " 0,0")
3927 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3930 movlhps\t{%2, %0|%0, %2}
3931 movhps\t{%2, %0|%0, %2}"
3932 [(set_attr "type" "ssemov")
3933 (set_attr "mode" "V4SF,V2SF")])
3935 (define_expand "vec_init<mode>"
3936 [(match_operand:SSEMODE 0 "register_operand" "")
3937 (match_operand 1 "" "")]
3940 ix86_expand_vector_init (false, operands[0], operands[1]);
3944 (define_insn "*vec_set<mode>_0_avx"
3945 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3946 (vec_merge:SSEMODE4S
3947 (vec_duplicate:SSEMODE4S
3948 (match_operand:<ssescalarmode> 2
3949 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3950 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3954 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3955 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3956 vmovd\t{%2, %0|%0, %2}
3957 vmovss\t{%2, %1, %0|%0, %1, %2}
3958 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3960 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3961 (set_attr "prefix_extra" "*,*,*,*,1,*")
3962 (set_attr "length_immediate" "*,*,*,*,1,*")
3963 (set_attr "prefix" "vex")
3964 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3966 (define_insn "*vec_set<mode>_0_sse4_1"
3967 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3968 (vec_merge:SSEMODE4S
3969 (vec_duplicate:SSEMODE4S
3970 (match_operand:<ssescalarmode> 2
3971 "general_operand" " x,m,*r,x,*rm,*rfF"))
3972 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3976 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3977 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3978 movd\t{%2, %0|%0, %2}
3979 movss\t{%2, %0|%0, %2}
3980 pinsrd\t{$0, %2, %0|%0, %2, 0}
3982 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3983 (set_attr "prefix_extra" "*,*,*,*,1,*")
3984 (set_attr "length_immediate" "*,*,*,*,1,*")
3985 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3987 (define_insn "*vec_set<mode>_0_sse2"
3988 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3989 (vec_merge:SSEMODE4S
3990 (vec_duplicate:SSEMODE4S
3991 (match_operand:<ssescalarmode> 2
3992 "general_operand" " m,*r,x,x*rfF"))
3993 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3997 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3998 movd\t{%2, %0|%0, %2}
3999 movss\t{%2, %0|%0, %2}
4001 [(set_attr "type" "ssemov")
4002 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4004 (define_insn "vec_set<mode>_0"
4005 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4006 (vec_merge:SSEMODE4S
4007 (vec_duplicate:SSEMODE4S
4008 (match_operand:<ssescalarmode> 2
4009 "general_operand" " m,x,x*rfF"))
4010 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4014 movss\t{%2, %0|%0, %2}
4015 movss\t{%2, %0|%0, %2}
4017 [(set_attr "type" "ssemov")
4018 (set_attr "mode" "SF")])
4020 ;; A subset is vec_setv4sf.
4021 (define_insn "*vec_setv4sf_avx"
4022 [(set (match_operand:V4SF 0 "register_operand" "=x")
4025 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4026 (match_operand:V4SF 1 "register_operand" "x")
4027 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4030 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4031 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4033 [(set_attr "type" "sselog")
4034 (set_attr "prefix_extra" "1")
4035 (set_attr "length_immediate" "1")
4036 (set_attr "prefix" "vex")
4037 (set_attr "mode" "V4SF")])
4039 (define_insn "*vec_setv4sf_sse4_1"
4040 [(set (match_operand:V4SF 0 "register_operand" "=x")
4043 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4044 (match_operand:V4SF 1 "register_operand" "0")
4045 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4048 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4049 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4051 [(set_attr "type" "sselog")
4052 (set_attr "prefix_data16" "1")
4053 (set_attr "prefix_extra" "1")
4054 (set_attr "length_immediate" "1")
4055 (set_attr "mode" "V4SF")])
4057 (define_insn "*avx_insertps"
4058 [(set (match_operand:V4SF 0 "register_operand" "=x")
4059 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4060 (match_operand:V4SF 1 "register_operand" "x")
4061 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4064 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4065 [(set_attr "type" "sselog")
4066 (set_attr "prefix" "vex")
4067 (set_attr "prefix_extra" "1")
4068 (set_attr "length_immediate" "1")
4069 (set_attr "mode" "V4SF")])
4071 (define_insn "sse4_1_insertps"
4072 [(set (match_operand:V4SF 0 "register_operand" "=x")
4073 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4074 (match_operand:V4SF 1 "register_operand" "0")
4075 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4078 "insertps\t{%3, %2, %0|%0, %2, %3}";
4079 [(set_attr "type" "sselog")
4080 (set_attr "prefix_data16" "1")
4081 (set_attr "prefix_extra" "1")
4082 (set_attr "length_immediate" "1")
4083 (set_attr "mode" "V4SF")])
4086 [(set (match_operand:V4SF 0 "memory_operand" "")
4089 (match_operand:SF 1 "nonmemory_operand" ""))
4092 "TARGET_SSE && reload_completed"
4095 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4099 (define_expand "vec_set<mode>"
4100 [(match_operand:SSEMODE 0 "register_operand" "")
4101 (match_operand:<ssescalarmode> 1 "register_operand" "")
4102 (match_operand 2 "const_int_operand" "")]
4105 ix86_expand_vector_set (false, operands[0], operands[1],
4106 INTVAL (operands[2]));
4110 (define_insn_and_split "*vec_extractv4sf_0"
4111 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4113 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4114 (parallel [(const_int 0)])))]
4115 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4117 "&& reload_completed"
4120 rtx op1 = operands[1];
4122 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4124 op1 = gen_lowpart (SFmode, op1);
4125 emit_move_insn (operands[0], op1);
4129 (define_expand "avx_vextractf128<mode>"
4130 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4131 (match_operand:AVX256MODE 1 "register_operand" "")
4132 (match_operand:SI 2 "const_0_to_1_operand" "")]
4135 switch (INTVAL (operands[2]))
4138 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4141 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4149 (define_insn "vec_extract_lo_<mode>"
4150 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4151 (vec_select:<avxhalfvecmode>
4152 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4153 (parallel [(const_int 0) (const_int 1)])))]
4155 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4156 [(set_attr "type" "sselog")
4157 (set_attr "prefix_extra" "1")
4158 (set_attr "length_immediate" "1")
4159 (set_attr "memory" "none,store")
4160 (set_attr "prefix" "vex")
4161 (set_attr "mode" "V8SF")])
4163 (define_insn "vec_extract_hi_<mode>"
4164 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4165 (vec_select:<avxhalfvecmode>
4166 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4167 (parallel [(const_int 2) (const_int 3)])))]
4169 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_extra" "1")
4172 (set_attr "length_immediate" "1")
4173 (set_attr "memory" "none,store")
4174 (set_attr "prefix" "vex")
4175 (set_attr "mode" "V8SF")])
4177 (define_insn "vec_extract_lo_<mode>"
4178 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4179 (vec_select:<avxhalfvecmode>
4180 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4181 (parallel [(const_int 0) (const_int 1)
4182 (const_int 2) (const_int 3)])))]
4184 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4185 [(set_attr "type" "sselog")
4186 (set_attr "prefix_extra" "1")
4187 (set_attr "length_immediate" "1")
4188 (set_attr "memory" "none,store")
4189 (set_attr "prefix" "vex")
4190 (set_attr "mode" "V8SF")])
4192 (define_insn "vec_extract_hi_<mode>"
4193 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4194 (vec_select:<avxhalfvecmode>
4195 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4196 (parallel [(const_int 4) (const_int 5)
4197 (const_int 6) (const_int 7)])))]
4199 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4200 [(set_attr "type" "sselog")
4201 (set_attr "prefix_extra" "1")
4202 (set_attr "length_immediate" "1")
4203 (set_attr "memory" "none,store")
4204 (set_attr "prefix" "vex")
4205 (set_attr "mode" "V8SF")])
4207 (define_insn "vec_extract_lo_v16hi"
4208 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4210 (match_operand:V16HI 1 "register_operand" "x,x")
4211 (parallel [(const_int 0) (const_int 1)
4212 (const_int 2) (const_int 3)
4213 (const_int 4) (const_int 5)
4214 (const_int 6) (const_int 7)])))]
4216 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4217 [(set_attr "type" "sselog")
4218 (set_attr "prefix_extra" "1")
4219 (set_attr "length_immediate" "1")
4220 (set_attr "memory" "none,store")
4221 (set_attr "prefix" "vex")
4222 (set_attr "mode" "V8SF")])
4224 (define_insn "vec_extract_hi_v16hi"
4225 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4227 (match_operand:V16HI 1 "register_operand" "x,x")
4228 (parallel [(const_int 8) (const_int 9)
4229 (const_int 10) (const_int 11)
4230 (const_int 12) (const_int 13)
4231 (const_int 14) (const_int 15)])))]
4233 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4234 [(set_attr "type" "sselog")
4235 (set_attr "prefix_extra" "1")
4236 (set_attr "length_immediate" "1")
4237 (set_attr "memory" "none,store")
4238 (set_attr "prefix" "vex")
4239 (set_attr "mode" "V8SF")])
4241 (define_insn "vec_extract_lo_v32qi"
4242 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4244 (match_operand:V32QI 1 "register_operand" "x,x")
4245 (parallel [(const_int 0) (const_int 1)
4246 (const_int 2) (const_int 3)
4247 (const_int 4) (const_int 5)
4248 (const_int 6) (const_int 7)
4249 (const_int 8) (const_int 9)
4250 (const_int 10) (const_int 11)
4251 (const_int 12) (const_int 13)
4252 (const_int 14) (const_int 15)])))]
4254 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4255 [(set_attr "type" "sselog")
4256 (set_attr "prefix_extra" "1")
4257 (set_attr "length_immediate" "1")
4258 (set_attr "memory" "none,store")
4259 (set_attr "prefix" "vex")
4260 (set_attr "mode" "V8SF")])
4262 (define_insn "vec_extract_hi_v32qi"
4263 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4265 (match_operand:V32QI 1 "register_operand" "x,x")
4266 (parallel [(const_int 16) (const_int 17)
4267 (const_int 18) (const_int 19)
4268 (const_int 20) (const_int 21)
4269 (const_int 22) (const_int 23)
4270 (const_int 24) (const_int 25)
4271 (const_int 26) (const_int 27)
4272 (const_int 28) (const_int 29)
4273 (const_int 30) (const_int 31)])))]
4275 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4276 [(set_attr "type" "sselog")
4277 (set_attr "prefix_extra" "1")
4278 (set_attr "length_immediate" "1")
4279 (set_attr "memory" "none,store")
4280 (set_attr "prefix" "vex")
4281 (set_attr "mode" "V8SF")])
4283 (define_insn "*sse4_1_extractps"
4284 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4286 (match_operand:V4SF 1 "register_operand" "x")
4287 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4289 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4290 [(set_attr "type" "sselog")
4291 (set_attr "prefix_data16" "1")
4292 (set_attr "prefix_extra" "1")
4293 (set_attr "length_immediate" "1")
4294 (set_attr "prefix" "maybe_vex")
4295 (set_attr "mode" "V4SF")])
4297 (define_insn_and_split "*vec_extract_v4sf_mem"
4298 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4300 (match_operand:V4SF 1 "memory_operand" "o")
4301 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4307 int i = INTVAL (operands[2]);
4309 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4313 (define_expand "vec_extract<mode>"
4314 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4315 (match_operand:SSEMODE 1 "register_operand" "")
4316 (match_operand 2 "const_int_operand" "")]
4319 ix86_expand_vector_extract (false, operands[0], operands[1],
4320 INTVAL (operands[2]));
4324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4326 ;; Parallel double-precision floating point element swizzling
4328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4330 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4331 (define_insn "avx_unpckhpd256"
4332 [(set (match_operand:V4DF 0 "register_operand" "=x")
4335 (match_operand:V4DF 1 "register_operand" "x")
4336 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4337 (parallel [(const_int 1) (const_int 5)
4338 (const_int 3) (const_int 7)])))]
4340 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4341 [(set_attr "type" "sselog")
4342 (set_attr "prefix" "vex")
4343 (set_attr "mode" "V4DF")])
4345 (define_expand "vec_interleave_highv2df"
4346 [(set (match_operand:V2DF 0 "register_operand" "")
4349 (match_operand:V2DF 1 "nonimmediate_operand" "")
4350 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4351 (parallel [(const_int 1)
4355 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4356 operands[2] = force_reg (V2DFmode, operands[2]);
4359 (define_insn "*avx_interleave_highv2df"
4360 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4363 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4364 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4365 (parallel [(const_int 1)
4367 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4369 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4370 vmovddup\t{%H1, %0|%0, %H1}
4371 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4372 vmovhpd\t{%1, %0|%0, %1}"
4373 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4374 (set_attr "prefix" "vex")
4375 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4377 (define_insn "*sse3_interleave_highv2df"
4378 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4381 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4382 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4383 (parallel [(const_int 1)
4385 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4387 unpckhpd\t{%2, %0|%0, %2}
4388 movddup\t{%H1, %0|%0, %H1}
4389 movlpd\t{%H1, %0|%0, %H1}
4390 movhpd\t{%1, %0|%0, %1}"
4391 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4392 (set_attr "prefix_data16" "*,*,1,1")
4393 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4395 (define_insn "*sse2_interleave_highv2df"
4396 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4399 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4400 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4401 (parallel [(const_int 1)
4403 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4405 unpckhpd\t{%2, %0|%0, %2}
4406 movlpd\t{%H1, %0|%0, %H1}
4407 movhpd\t{%1, %0|%0, %1}"
4408 [(set_attr "type" "sselog,ssemov,ssemov")
4409 (set_attr "prefix_data16" "*,1,1")
4410 (set_attr "mode" "V2DF,V1DF,V1DF")])
4412 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4413 (define_expand "avx_movddup256"
4414 [(set (match_operand:V4DF 0 "register_operand" "")
4417 (match_operand:V4DF 1 "nonimmediate_operand" "")
4419 (parallel [(const_int 0) (const_int 4)
4420 (const_int 2) (const_int 6)])))]
4424 (define_expand "avx_unpcklpd256"
4425 [(set (match_operand:V4DF 0 "register_operand" "")
4428 (match_operand:V4DF 1 "register_operand" "")
4429 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4430 (parallel [(const_int 0) (const_int 4)
4431 (const_int 2) (const_int 6)])))]
4435 (define_insn "*avx_unpcklpd256"
4436 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4439 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4440 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4441 (parallel [(const_int 0) (const_int 4)
4442 (const_int 2) (const_int 6)])))]
4444 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4446 vmovddup\t{%1, %0|%0, %1}
4447 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4448 [(set_attr "type" "sselog")
4449 (set_attr "prefix" "vex")
4450 (set_attr "mode" "V4DF")])
4452 (define_expand "vec_interleave_lowv2df"
4453 [(set (match_operand:V2DF 0 "register_operand" "")
4456 (match_operand:V2DF 1 "nonimmediate_operand" "")
4457 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4458 (parallel [(const_int 0)
4462 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4463 operands[1] = force_reg (V2DFmode, operands[1]);
4466 (define_insn "*avx_interleave_lowv2df"
4467 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4470 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4471 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4472 (parallel [(const_int 0)
4474 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4476 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4477 vmovddup\t{%1, %0|%0, %1}
4478 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4479 vmovlpd\t{%2, %H0|%H0, %2}"
4480 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4481 (set_attr "prefix" "vex")
4482 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4484 (define_insn "*sse3_interleave_lowv2df"
4485 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4488 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4489 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4490 (parallel [(const_int 0)
4492 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4494 unpcklpd\t{%2, %0|%0, %2}
4495 movddup\t{%1, %0|%0, %1}
4496 movhpd\t{%2, %0|%0, %2}
4497 movlpd\t{%2, %H0|%H0, %2}"
4498 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4499 (set_attr "prefix_data16" "*,*,1,1")
4500 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4502 (define_insn "*sse2_interleave_lowv2df"
4503 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4506 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4507 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4508 (parallel [(const_int 0)
4510 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4512 unpcklpd\t{%2, %0|%0, %2}
4513 movhpd\t{%2, %0|%0, %2}
4514 movlpd\t{%2, %H0|%H0, %2}"
4515 [(set_attr "type" "sselog,ssemov,ssemov")
4516 (set_attr "prefix_data16" "*,1,1")
4517 (set_attr "mode" "V2DF,V1DF,V1DF")])
4520 [(set (match_operand:V2DF 0 "memory_operand" "")
4523 (match_operand:V2DF 1 "register_operand" "")
4525 (parallel [(const_int 0)
4527 "TARGET_SSE3 && reload_completed"
4530 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4531 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4532 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4537 [(set (match_operand:V2DF 0 "register_operand" "")
4540 (match_operand:V2DF 1 "memory_operand" "")
4542 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4543 (match_operand:SI 3 "const_int_operand" "")])))]
4544 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4545 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4547 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4550 (define_expand "avx_shufpd256"
4551 [(match_operand:V4DF 0 "register_operand" "")
4552 (match_operand:V4DF 1 "register_operand" "")
4553 (match_operand:V4DF 2 "nonimmediate_operand" "")
4554 (match_operand:SI 3 "const_int_operand" "")]
4557 int mask = INTVAL (operands[3]);
4558 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4560 GEN_INT (mask & 2 ? 5 : 4),
4561 GEN_INT (mask & 4 ? 3 : 2),
4562 GEN_INT (mask & 8 ? 7 : 6)));
4566 (define_insn "avx_shufpd256_1"
4567 [(set (match_operand:V4DF 0 "register_operand" "=x")
4570 (match_operand:V4DF 1 "register_operand" "x")
4571 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4572 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4573 (match_operand 4 "const_4_to_5_operand" "")
4574 (match_operand 5 "const_2_to_3_operand" "")
4575 (match_operand 6 "const_6_to_7_operand" "")])))]
4579 mask = INTVAL (operands[3]);
4580 mask |= (INTVAL (operands[4]) - 4) << 1;
4581 mask |= (INTVAL (operands[5]) - 2) << 2;
4582 mask |= (INTVAL (operands[6]) - 6) << 3;
4583 operands[3] = GEN_INT (mask);
4585 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4587 [(set_attr "type" "sselog")
4588 (set_attr "length_immediate" "1")
4589 (set_attr "prefix" "vex")
4590 (set_attr "mode" "V4DF")])
4592 (define_expand "sse2_shufpd"
4593 [(match_operand:V2DF 0 "register_operand" "")
4594 (match_operand:V2DF 1 "register_operand" "")
4595 (match_operand:V2DF 2 "nonimmediate_operand" "")
4596 (match_operand:SI 3 "const_int_operand" "")]
4599 int mask = INTVAL (operands[3]);
4600 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4602 GEN_INT (mask & 2 ? 3 : 2)));
4606 (define_expand "vec_extract_even<mode>"
4607 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4608 (match_operand:SSEMODE_EO 1 "register_operand" "")
4609 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4612 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4616 (define_expand "vec_extract_odd<mode>"
4617 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4618 (match_operand:SSEMODE_EO 1 "register_operand" "")
4619 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4622 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4626 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4627 (define_insn "*avx_interleave_highv2di"
4628 [(set (match_operand:V2DI 0 "register_operand" "=x")
4631 (match_operand:V2DI 1 "register_operand" "x")
4632 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4633 (parallel [(const_int 1)
4636 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4637 [(set_attr "type" "sselog")
4638 (set_attr "prefix" "vex")
4639 (set_attr "mode" "TI")])
4641 (define_insn "vec_interleave_highv2di"
4642 [(set (match_operand:V2DI 0 "register_operand" "=x")
4645 (match_operand:V2DI 1 "register_operand" "0")
4646 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4647 (parallel [(const_int 1)
4650 "punpckhqdq\t{%2, %0|%0, %2}"
4651 [(set_attr "type" "sselog")
4652 (set_attr "prefix_data16" "1")
4653 (set_attr "mode" "TI")])
4655 (define_insn "*avx_interleave_lowv2di"
4656 [(set (match_operand:V2DI 0 "register_operand" "=x")
4659 (match_operand:V2DI 1 "register_operand" "x")
4660 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4661 (parallel [(const_int 0)
4664 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4665 [(set_attr "type" "sselog")
4666 (set_attr "prefix" "vex")
4667 (set_attr "mode" "TI")])
4669 (define_insn "vec_interleave_lowv2di"
4670 [(set (match_operand:V2DI 0 "register_operand" "=x")
4673 (match_operand:V2DI 1 "register_operand" "0")
4674 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4675 (parallel [(const_int 0)
4678 "punpcklqdq\t{%2, %0|%0, %2}"
4679 [(set_attr "type" "sselog")
4680 (set_attr "prefix_data16" "1")
4681 (set_attr "mode" "TI")])
4683 (define_insn "*avx_shufpd_<mode>"
4684 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4685 (vec_select:SSEMODE2D
4686 (vec_concat:<ssedoublesizemode>
4687 (match_operand:SSEMODE2D 1 "register_operand" "x")
4688 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4689 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4690 (match_operand 4 "const_2_to_3_operand" "")])))]
4694 mask = INTVAL (operands[3]);
4695 mask |= (INTVAL (operands[4]) - 2) << 1;
4696 operands[3] = GEN_INT (mask);
4698 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4700 [(set_attr "type" "sselog")
4701 (set_attr "length_immediate" "1")
4702 (set_attr "prefix" "vex")
4703 (set_attr "mode" "V2DF")])
4705 (define_insn "sse2_shufpd_<mode>"
4706 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4707 (vec_select:SSEMODE2D
4708 (vec_concat:<ssedoublesizemode>
4709 (match_operand:SSEMODE2D 1 "register_operand" "0")
4710 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4711 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4712 (match_operand 4 "const_2_to_3_operand" "")])))]
4716 mask = INTVAL (operands[3]);
4717 mask |= (INTVAL (operands[4]) - 2) << 1;
4718 operands[3] = GEN_INT (mask);
4720 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4722 [(set_attr "type" "sselog")
4723 (set_attr "length_immediate" "1")
4724 (set_attr "mode" "V2DF")])
4726 ;; Avoid combining registers from different units in a single alternative,
4727 ;; see comment above inline_secondary_memory_needed function in i386.c
4728 (define_insn "*avx_storehpd"
4729 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4731 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4732 (parallel [(const_int 1)])))]
4733 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4735 vmovhpd\t{%1, %0|%0, %1}
4736 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4740 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4741 (set_attr "prefix" "vex")
4742 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4744 (define_insn "sse2_storehpd"
4745 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4747 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4748 (parallel [(const_int 1)])))]
4749 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4751 movhpd\t{%1, %0|%0, %1}
4756 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4757 (set_attr "prefix_data16" "1,*,*,*,*")
4758 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4761 [(set (match_operand:DF 0 "register_operand" "")
4763 (match_operand:V2DF 1 "memory_operand" "")
4764 (parallel [(const_int 1)])))]
4765 "TARGET_SSE2 && reload_completed"
4766 [(set (match_dup 0) (match_dup 1))]
4768 operands[1] = adjust_address (operands[1], DFmode, 8);
4771 ;; Avoid combining registers from different units in a single alternative,
4772 ;; see comment above inline_secondary_memory_needed function in i386.c
4773 (define_insn "sse2_storelpd"
4774 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4776 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4777 (parallel [(const_int 0)])))]
4778 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4780 %vmovlpd\t{%1, %0|%0, %1}
4785 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4786 (set_attr "prefix_data16" "1,*,*,*,*")
4787 (set_attr "prefix" "maybe_vex")
4788 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4791 [(set (match_operand:DF 0 "register_operand" "")
4793 (match_operand:V2DF 1 "nonimmediate_operand" "")
4794 (parallel [(const_int 0)])))]
4795 "TARGET_SSE2 && reload_completed"
4798 rtx op1 = operands[1];
4800 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4802 op1 = gen_lowpart (DFmode, op1);
4803 emit_move_insn (operands[0], op1);
4807 (define_expand "sse2_loadhpd_exp"
4808 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4811 (match_operand:V2DF 1 "nonimmediate_operand" "")
4812 (parallel [(const_int 0)]))
4813 (match_operand:DF 2 "nonimmediate_operand" "")))]
4815 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4817 ;; Avoid combining registers from different units in a single alternative,
4818 ;; see comment above inline_secondary_memory_needed function in i386.c
4819 (define_insn "*avx_loadhpd"
4820 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4823 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4824 (parallel [(const_int 0)]))
4825 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4826 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4828 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4829 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4833 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4834 (set_attr "prefix" "vex")
4835 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4837 (define_insn "sse2_loadhpd"
4838 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4841 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4842 (parallel [(const_int 0)]))
4843 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4844 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4846 movhpd\t{%2, %0|%0, %2}
4847 unpcklpd\t{%2, %0|%0, %2}
4848 shufpd\t{$1, %1, %0|%0, %1, 1}
4852 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4853 (set_attr "prefix_data16" "1,*,*,*,*,*")
4854 (set_attr "length_immediate" "*,*,1,*,*,*")
4855 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4858 [(set (match_operand:V2DF 0 "memory_operand" "")
4860 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4861 (match_operand:DF 1 "register_operand" "")))]
4862 "TARGET_SSE2 && reload_completed"
4863 [(set (match_dup 0) (match_dup 1))]
4865 operands[0] = adjust_address (operands[0], DFmode, 8);
4868 (define_expand "sse2_loadlpd_exp"
4869 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4871 (match_operand:DF 2 "nonimmediate_operand" "")
4873 (match_operand:V2DF 1 "nonimmediate_operand" "")
4874 (parallel [(const_int 1)]))))]
4876 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4878 ;; Avoid combining registers from different units in a single alternative,
4879 ;; see comment above inline_secondary_memory_needed function in i386.c
4880 (define_insn "*avx_loadlpd"
4881 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4883 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4885 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4886 (parallel [(const_int 1)]))))]
4887 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4889 vmovsd\t{%2, %0|%0, %2}
4890 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4891 vmovsd\t{%2, %1, %0|%0, %1, %2}
4892 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4896 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4897 (set_attr "prefix" "vex")
4898 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4900 (define_insn "sse2_loadlpd"
4901 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4903 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4905 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4906 (parallel [(const_int 1)]))))]
4907 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4909 movsd\t{%2, %0|%0, %2}
4910 movlpd\t{%2, %0|%0, %2}
4911 movsd\t{%2, %0|%0, %2}
4912 shufpd\t{$2, %2, %0|%0, %2, 2}
4913 movhpd\t{%H1, %0|%0, %H1}
4917 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4918 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4919 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4920 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4923 [(set (match_operand:V2DF 0 "memory_operand" "")
4925 (match_operand:DF 1 "register_operand" "")
4926 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4927 "TARGET_SSE2 && reload_completed"
4928 [(set (match_dup 0) (match_dup 1))]
4930 operands[0] = adjust_address (operands[0], DFmode, 8);
4933 ;; Not sure these two are ever used, but it doesn't hurt to have
4935 (define_insn "*vec_extractv2df_1_sse"
4936 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4938 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4939 (parallel [(const_int 1)])))]
4940 "!TARGET_SSE2 && TARGET_SSE
4941 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4943 movhps\t{%1, %0|%0, %1}
4944 movhlps\t{%1, %0|%0, %1}
4945 movlps\t{%H1, %0|%0, %H1}"
4946 [(set_attr "type" "ssemov")
4947 (set_attr "mode" "V2SF,V4SF,V2SF")])
4949 (define_insn "*vec_extractv2df_0_sse"
4950 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4952 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4953 (parallel [(const_int 0)])))]
4954 "!TARGET_SSE2 && TARGET_SSE
4955 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4957 movlps\t{%1, %0|%0, %1}
4958 movaps\t{%1, %0|%0, %1}
4959 movlps\t{%1, %0|%0, %1}"
4960 [(set_attr "type" "ssemov")
4961 (set_attr "mode" "V2SF,V4SF,V2SF")])
4963 (define_insn "*avx_movsd"
4964 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4966 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4967 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4971 vmovsd\t{%2, %1, %0|%0, %1, %2}
4972 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4973 vmovlpd\t{%2, %0|%0, %2}
4974 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4975 vmovhps\t{%1, %H0|%H0, %1}"
4976 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4977 (set_attr "prefix" "vex")
4978 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4980 (define_insn "sse2_movsd"
4981 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4983 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4984 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4988 movsd\t{%2, %0|%0, %2}
4989 movlpd\t{%2, %0|%0, %2}
4990 movlpd\t{%2, %0|%0, %2}
4991 shufpd\t{$2, %2, %0|%0, %2, 2}
4992 movhps\t{%H1, %0|%0, %H1}
4993 movhps\t{%1, %H0|%H0, %1}"
4994 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4995 (set_attr "prefix_data16" "*,1,1,*,*,*")
4996 (set_attr "length_immediate" "*,*,*,1,*,*")
4997 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4999 (define_insn "*vec_dupv2df_sse3"
5000 [(set (match_operand:V2DF 0 "register_operand" "=x")
5002 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5004 "%vmovddup\t{%1, %0|%0, %1}"
5005 [(set_attr "type" "sselog1")
5006 (set_attr "prefix" "maybe_vex")
5007 (set_attr "mode" "DF")])
5009 (define_insn "vec_dupv2df"
5010 [(set (match_operand:V2DF 0 "register_operand" "=x")
5012 (match_operand:DF 1 "register_operand" "0")))]
5015 [(set_attr "type" "sselog1")
5016 (set_attr "mode" "V2DF")])
5018 (define_insn "*vec_concatv2df_sse3"
5019 [(set (match_operand:V2DF 0 "register_operand" "=x")
5021 (match_operand:DF 1 "nonimmediate_operand" "xm")
5024 "%vmovddup\t{%1, %0|%0, %1}"
5025 [(set_attr "type" "sselog1")
5026 (set_attr "prefix" "maybe_vex")
5027 (set_attr "mode" "DF")])
5029 (define_insn "*vec_concatv2df_avx"
5030 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5032 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5033 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5036 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5037 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5038 vmovsd\t{%1, %0|%0, %1}"
5039 [(set_attr "type" "ssemov")
5040 (set_attr "prefix" "vex")
5041 (set_attr "mode" "DF,V1DF,DF")])
5043 (define_insn "*vec_concatv2df"
5044 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5046 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5047 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5050 unpcklpd\t{%2, %0|%0, %2}
5051 movhpd\t{%2, %0|%0, %2}
5052 movsd\t{%1, %0|%0, %1}
5053 movlhps\t{%2, %0|%0, %2}
5054 movhps\t{%2, %0|%0, %2}"
5055 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5056 (set_attr "prefix_data16" "*,1,*,*,*")
5057 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5061 ;; Parallel integral arithmetic
5063 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5065 (define_expand "neg<mode>2"
5066 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5069 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5071 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5073 (define_expand "<plusminus_insn><mode>3"
5074 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5079 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5081 (define_insn "*avx_<plusminus_insn><mode>3"
5082 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5084 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5085 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5086 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5087 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5088 [(set_attr "type" "sseiadd")
5089 (set_attr "prefix" "vex")
5090 (set_attr "mode" "TI")])
5092 (define_insn "*<plusminus_insn><mode>3"
5093 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5095 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5096 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5097 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5098 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5099 [(set_attr "type" "sseiadd")
5100 (set_attr "prefix_data16" "1")
5101 (set_attr "mode" "TI")])
5103 (define_expand "sse2_<plusminus_insn><mode>3"
5104 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5105 (sat_plusminus:SSEMODE12
5106 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5107 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5109 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5111 (define_insn "*avx_<plusminus_insn><mode>3"
5112 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5113 (sat_plusminus:SSEMODE12
5114 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5115 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5116 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5117 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5118 [(set_attr "type" "sseiadd")
5119 (set_attr "prefix" "vex")
5120 (set_attr "mode" "TI")])
5122 (define_insn "*sse2_<plusminus_insn><mode>3"
5123 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5124 (sat_plusminus:SSEMODE12
5125 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5126 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5127 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5128 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5129 [(set_attr "type" "sseiadd")
5130 (set_attr "prefix_data16" "1")
5131 (set_attr "mode" "TI")])
5133 (define_insn_and_split "mulv16qi3"
5134 [(set (match_operand:V16QI 0 "register_operand" "")
5135 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5136 (match_operand:V16QI 2 "register_operand" "")))]
5138 && can_create_pseudo_p ()"
5146 for (i = 0; i < 6; ++i)
5147 t[i] = gen_reg_rtx (V16QImode);
5149 /* Unpack data such that we've got a source byte in each low byte of
5150 each word. We don't care what goes into the high byte of each word.
5151 Rather than trying to get zero in there, most convenient is to let
5152 it be a copy of the low byte. */
5153 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5154 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5155 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5156 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5158 /* Multiply words. The end-of-line annotations here give a picture of what
5159 the output of that instruction looks like. Dot means don't care; the
5160 letters are the bytes of the result with A being the most significant. */
5161 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5162 gen_lowpart (V8HImode, t[0]),
5163 gen_lowpart (V8HImode, t[1])));
5164 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5165 gen_lowpart (V8HImode, t[2]),
5166 gen_lowpart (V8HImode, t[3])));
5168 /* Extract the even bytes and merge them back together. */
5169 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5173 (define_expand "mulv8hi3"
5174 [(set (match_operand:V8HI 0 "register_operand" "")
5175 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5176 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5178 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5180 (define_insn "*avx_mulv8hi3"
5181 [(set (match_operand:V8HI 0 "register_operand" "=x")
5182 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5183 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5184 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5185 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5186 [(set_attr "type" "sseimul")
5187 (set_attr "prefix" "vex")
5188 (set_attr "mode" "TI")])
5190 (define_insn "*mulv8hi3"
5191 [(set (match_operand:V8HI 0 "register_operand" "=x")
5192 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5193 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5194 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5195 "pmullw\t{%2, %0|%0, %2}"
5196 [(set_attr "type" "sseimul")
5197 (set_attr "prefix_data16" "1")
5198 (set_attr "mode" "TI")])
5200 (define_expand "smulv8hi3_highpart"
5201 [(set (match_operand:V8HI 0 "register_operand" "")
5206 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5208 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5211 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5213 (define_insn "*avxv8hi3_highpart"
5214 [(set (match_operand:V8HI 0 "register_operand" "=x")
5219 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5223 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5224 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5225 [(set_attr "type" "sseimul")
5226 (set_attr "prefix" "vex")
5227 (set_attr "mode" "TI")])
5229 (define_insn "*smulv8hi3_highpart"
5230 [(set (match_operand:V8HI 0 "register_operand" "=x")
5235 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5237 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5239 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5240 "pmulhw\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseimul")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5245 (define_expand "umulv8hi3_highpart"
5246 [(set (match_operand:V8HI 0 "register_operand" "")
5251 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5253 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5256 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5258 (define_insn "*avx_umulv8hi3_highpart"
5259 [(set (match_operand:V8HI 0 "register_operand" "=x")
5264 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5266 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5268 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5269 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5270 [(set_attr "type" "sseimul")
5271 (set_attr "prefix" "vex")
5272 (set_attr "mode" "TI")])
5274 (define_insn "*umulv8hi3_highpart"
5275 [(set (match_operand:V8HI 0 "register_operand" "=x")
5280 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5282 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5284 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5285 "pmulhuw\t{%2, %0|%0, %2}"
5286 [(set_attr "type" "sseimul")
5287 (set_attr "prefix_data16" "1")
5288 (set_attr "mode" "TI")])
5290 (define_expand "sse2_umulv2siv2di3"
5291 [(set (match_operand:V2DI 0 "register_operand" "")
5295 (match_operand:V4SI 1 "nonimmediate_operand" "")
5296 (parallel [(const_int 0) (const_int 2)])))
5299 (match_operand:V4SI 2 "nonimmediate_operand" "")
5300 (parallel [(const_int 0) (const_int 2)])))))]
5302 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5304 (define_insn "*avx_umulv2siv2di3"
5305 [(set (match_operand:V2DI 0 "register_operand" "=x")
5309 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5310 (parallel [(const_int 0) (const_int 2)])))
5313 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5314 (parallel [(const_int 0) (const_int 2)])))))]
5315 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5316 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5317 [(set_attr "type" "sseimul")
5318 (set_attr "prefix" "vex")
5319 (set_attr "mode" "TI")])
5321 (define_insn "*sse2_umulv2siv2di3"
5322 [(set (match_operand:V2DI 0 "register_operand" "=x")
5326 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5327 (parallel [(const_int 0) (const_int 2)])))
5330 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5331 (parallel [(const_int 0) (const_int 2)])))))]
5332 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5333 "pmuludq\t{%2, %0|%0, %2}"
5334 [(set_attr "type" "sseimul")
5335 (set_attr "prefix_data16" "1")
5336 (set_attr "mode" "TI")])
5338 (define_expand "sse4_1_mulv2siv2di3"
5339 [(set (match_operand:V2DI 0 "register_operand" "")
5343 (match_operand:V4SI 1 "nonimmediate_operand" "")
5344 (parallel [(const_int 0) (const_int 2)])))
5347 (match_operand:V4SI 2 "nonimmediate_operand" "")
5348 (parallel [(const_int 0) (const_int 2)])))))]
5350 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5352 (define_insn "*avx_mulv2siv2di3"
5353 [(set (match_operand:V2DI 0 "register_operand" "=x")
5357 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5358 (parallel [(const_int 0) (const_int 2)])))
5361 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5362 (parallel [(const_int 0) (const_int 2)])))))]
5363 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5364 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5365 [(set_attr "type" "sseimul")
5366 (set_attr "prefix_extra" "1")
5367 (set_attr "prefix" "vex")
5368 (set_attr "mode" "TI")])
5370 (define_insn "*sse4_1_mulv2siv2di3"
5371 [(set (match_operand:V2DI 0 "register_operand" "=x")
5375 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5376 (parallel [(const_int 0) (const_int 2)])))
5379 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5380 (parallel [(const_int 0) (const_int 2)])))))]
5381 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5382 "pmuldq\t{%2, %0|%0, %2}"
5383 [(set_attr "type" "sseimul")
5384 (set_attr "prefix_extra" "1")
5385 (set_attr "mode" "TI")])
5387 (define_expand "sse2_pmaddwd"
5388 [(set (match_operand:V4SI 0 "register_operand" "")
5393 (match_operand:V8HI 1 "nonimmediate_operand" "")
5394 (parallel [(const_int 0)
5400 (match_operand:V8HI 2 "nonimmediate_operand" "")
5401 (parallel [(const_int 0)
5407 (vec_select:V4HI (match_dup 1)
5408 (parallel [(const_int 1)
5413 (vec_select:V4HI (match_dup 2)
5414 (parallel [(const_int 1)
5417 (const_int 7)]))))))]
5419 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5421 (define_insn "*avx_pmaddwd"
5422 [(set (match_operand:V4SI 0 "register_operand" "=x")
5427 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5428 (parallel [(const_int 0)
5434 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5435 (parallel [(const_int 0)
5441 (vec_select:V4HI (match_dup 1)
5442 (parallel [(const_int 1)
5447 (vec_select:V4HI (match_dup 2)
5448 (parallel [(const_int 1)
5451 (const_int 7)]))))))]
5452 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5453 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5454 [(set_attr "type" "sseiadd")
5455 (set_attr "prefix" "vex")
5456 (set_attr "mode" "TI")])
5458 (define_insn "*sse2_pmaddwd"
5459 [(set (match_operand:V4SI 0 "register_operand" "=x")
5464 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5465 (parallel [(const_int 0)
5471 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5472 (parallel [(const_int 0)
5478 (vec_select:V4HI (match_dup 1)
5479 (parallel [(const_int 1)
5484 (vec_select:V4HI (match_dup 2)
5485 (parallel [(const_int 1)
5488 (const_int 7)]))))))]
5489 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5490 "pmaddwd\t{%2, %0|%0, %2}"
5491 [(set_attr "type" "sseiadd")
5492 (set_attr "atom_unit" "simul")
5493 (set_attr "prefix_data16" "1")
5494 (set_attr "mode" "TI")])
5496 (define_expand "mulv4si3"
5497 [(set (match_operand:V4SI 0 "register_operand" "")
5498 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5499 (match_operand:V4SI 2 "register_operand" "")))]
5502 if (TARGET_SSE4_1 || TARGET_AVX)
5503 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5506 (define_insn "*avx_mulv4si3"
5507 [(set (match_operand:V4SI 0 "register_operand" "=x")
5508 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5510 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5511 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5512 [(set_attr "type" "sseimul")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "prefix" "vex")
5515 (set_attr "mode" "TI")])
5517 (define_insn "*sse4_1_mulv4si3"
5518 [(set (match_operand:V4SI 0 "register_operand" "=x")
5519 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5520 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5521 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5522 "pmulld\t{%2, %0|%0, %2}"
5523 [(set_attr "type" "sseimul")
5524 (set_attr "prefix_extra" "1")
5525 (set_attr "mode" "TI")])
5527 (define_insn_and_split "*sse2_mulv4si3"
5528 [(set (match_operand:V4SI 0 "register_operand" "")
5529 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5530 (match_operand:V4SI 2 "register_operand" "")))]
5531 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5532 && can_create_pseudo_p ()"
5537 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5543 t1 = gen_reg_rtx (V4SImode);
5544 t2 = gen_reg_rtx (V4SImode);
5545 t3 = gen_reg_rtx (V4SImode);
5546 t4 = gen_reg_rtx (V4SImode);
5547 t5 = gen_reg_rtx (V4SImode);
5548 t6 = gen_reg_rtx (V4SImode);
5549 thirtytwo = GEN_INT (32);
5551 /* Multiply elements 2 and 0. */
5552 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5555 /* Shift both input vectors down one element, so that elements 3
5556 and 1 are now in the slots for elements 2 and 0. For K8, at
5557 least, this is faster than using a shuffle. */
5558 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5559 gen_lowpart (V1TImode, op1),
5561 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5562 gen_lowpart (V1TImode, op2),
5564 /* Multiply elements 3 and 1. */
5565 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5568 /* Move the results in element 2 down to element 1; we don't care
5569 what goes in elements 2 and 3. */
5570 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5571 const0_rtx, const0_rtx));
5572 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5573 const0_rtx, const0_rtx));
5575 /* Merge the parts back together. */
5576 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5580 (define_insn_and_split "mulv2di3"
5581 [(set (match_operand:V2DI 0 "register_operand" "")
5582 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5583 (match_operand:V2DI 2 "register_operand" "")))]
5585 && can_create_pseudo_p ()"
5590 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5599 /* op1: A,B,C,D, op2: E,F,G,H */
5600 op1 = gen_lowpart (V4SImode, op1);
5601 op2 = gen_lowpart (V4SImode, op2);
5603 t1 = gen_reg_rtx (V4SImode);
5604 t2 = gen_reg_rtx (V4SImode);
5605 t3 = gen_reg_rtx (V2DImode);
5606 t4 = gen_reg_rtx (V2DImode);
5609 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5615 /* t2: (B*E),(A*F),(D*G),(C*H) */
5616 emit_insn (gen_mulv4si3 (t2, t1, op2));
5618 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5619 emit_insn (gen_xop_phadddq (t3, t2));
5621 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5622 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5624 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5625 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5629 t1 = gen_reg_rtx (V2DImode);
5630 t2 = gen_reg_rtx (V2DImode);
5631 t3 = gen_reg_rtx (V2DImode);
5632 t4 = gen_reg_rtx (V2DImode);
5633 t5 = gen_reg_rtx (V2DImode);
5634 t6 = gen_reg_rtx (V2DImode);
5635 thirtytwo = GEN_INT (32);
5637 /* Multiply low parts. */
5638 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5639 gen_lowpart (V4SImode, op2)));
5641 /* Shift input vectors left 32 bits so we can multiply high parts. */
5642 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5643 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5645 /* Multiply high parts by low parts. */
5646 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5647 gen_lowpart (V4SImode, t3)));
5648 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5649 gen_lowpart (V4SImode, t2)));
5651 /* Shift them back. */
5652 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5653 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5655 /* Add the three parts together. */
5656 emit_insn (gen_addv2di3 (t6, t1, t4));
5657 emit_insn (gen_addv2di3 (op0, t6, t5));
5662 (define_expand "vec_widen_smult_hi_v8hi"
5663 [(match_operand:V4SI 0 "register_operand" "")
5664 (match_operand:V8HI 1 "register_operand" "")
5665 (match_operand:V8HI 2 "register_operand" "")]
5668 rtx op1, op2, t1, t2, dest;
5672 t1 = gen_reg_rtx (V8HImode);
5673 t2 = gen_reg_rtx (V8HImode);
5674 dest = gen_lowpart (V8HImode, operands[0]);
5676 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5677 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5678 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5682 (define_expand "vec_widen_smult_lo_v8hi"
5683 [(match_operand:V4SI 0 "register_operand" "")
5684 (match_operand:V8HI 1 "register_operand" "")
5685 (match_operand:V8HI 2 "register_operand" "")]
5688 rtx op1, op2, t1, t2, dest;
5692 t1 = gen_reg_rtx (V8HImode);
5693 t2 = gen_reg_rtx (V8HImode);
5694 dest = gen_lowpart (V8HImode, operands[0]);
5696 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5697 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5698 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5702 (define_expand "vec_widen_umult_hi_v8hi"
5703 [(match_operand:V4SI 0 "register_operand" "")
5704 (match_operand:V8HI 1 "register_operand" "")
5705 (match_operand:V8HI 2 "register_operand" "")]
5708 rtx op1, op2, t1, t2, dest;
5712 t1 = gen_reg_rtx (V8HImode);
5713 t2 = gen_reg_rtx (V8HImode);
5714 dest = gen_lowpart (V8HImode, operands[0]);
5716 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5717 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5718 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5722 (define_expand "vec_widen_umult_lo_v8hi"
5723 [(match_operand:V4SI 0 "register_operand" "")
5724 (match_operand:V8HI 1 "register_operand" "")
5725 (match_operand:V8HI 2 "register_operand" "")]
5728 rtx op1, op2, t1, t2, dest;
5732 t1 = gen_reg_rtx (V8HImode);
5733 t2 = gen_reg_rtx (V8HImode);
5734 dest = gen_lowpart (V8HImode, operands[0]);
5736 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5737 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5738 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5742 (define_expand "vec_widen_smult_hi_v4si"
5743 [(match_operand:V2DI 0 "register_operand" "")
5744 (match_operand:V4SI 1 "register_operand" "")
5745 (match_operand:V4SI 2 "register_operand" "")]
5750 t1 = gen_reg_rtx (V4SImode);
5751 t2 = gen_reg_rtx (V4SImode);
5753 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5758 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5763 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5767 (define_expand "vec_widen_smult_lo_v4si"
5768 [(match_operand:V2DI 0 "register_operand" "")
5769 (match_operand:V4SI 1 "register_operand" "")
5770 (match_operand:V4SI 2 "register_operand" "")]
5775 t1 = gen_reg_rtx (V4SImode);
5776 t2 = gen_reg_rtx (V4SImode);
5778 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5783 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5788 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5792 (define_expand "vec_widen_umult_hi_v4si"
5793 [(match_operand:V2DI 0 "register_operand" "")
5794 (match_operand:V4SI 1 "register_operand" "")
5795 (match_operand:V4SI 2 "register_operand" "")]
5798 rtx op1, op2, t1, t2;
5802 t1 = gen_reg_rtx (V4SImode);
5803 t2 = gen_reg_rtx (V4SImode);
5805 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5806 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5807 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5811 (define_expand "vec_widen_umult_lo_v4si"
5812 [(match_operand:V2DI 0 "register_operand" "")
5813 (match_operand:V4SI 1 "register_operand" "")
5814 (match_operand:V4SI 2 "register_operand" "")]
5817 rtx op1, op2, t1, t2;
5821 t1 = gen_reg_rtx (V4SImode);
5822 t2 = gen_reg_rtx (V4SImode);
5824 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5825 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5826 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5830 (define_expand "sdot_prodv8hi"
5831 [(match_operand:V4SI 0 "register_operand" "")
5832 (match_operand:V8HI 1 "register_operand" "")
5833 (match_operand:V8HI 2 "register_operand" "")
5834 (match_operand:V4SI 3 "register_operand" "")]
5837 rtx t = gen_reg_rtx (V4SImode);
5838 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5839 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5843 (define_expand "udot_prodv4si"
5844 [(match_operand:V2DI 0 "register_operand" "")
5845 (match_operand:V4SI 1 "register_operand" "")
5846 (match_operand:V4SI 2 "register_operand" "")
5847 (match_operand:V2DI 3 "register_operand" "")]
5852 t1 = gen_reg_rtx (V2DImode);
5853 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5854 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5856 t2 = gen_reg_rtx (V4SImode);
5857 t3 = gen_reg_rtx (V4SImode);
5858 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5859 gen_lowpart (V1TImode, operands[1]),
5861 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5862 gen_lowpart (V1TImode, operands[2]),
5865 t4 = gen_reg_rtx (V2DImode);
5866 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5868 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5872 (define_insn "*avx_ashr<mode>3"
5873 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5875 (match_operand:SSEMODE24 1 "register_operand" "x")
5876 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5878 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5879 [(set_attr "type" "sseishft")
5880 (set_attr "prefix" "vex")
5881 (set (attr "length_immediate")
5882 (if_then_else (match_operand 2 "const_int_operand" "")
5884 (const_string "0")))
5885 (set_attr "mode" "TI")])
5887 (define_insn "ashr<mode>3"
5888 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5890 (match_operand:SSEMODE24 1 "register_operand" "0")
5891 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5893 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5894 [(set_attr "type" "sseishft")
5895 (set_attr "prefix_data16" "1")
5896 (set (attr "length_immediate")
5897 (if_then_else (match_operand 2 "const_int_operand" "")
5899 (const_string "0")))
5900 (set_attr "mode" "TI")])
5902 (define_insn "*avx_lshrv1ti3"
5903 [(set (match_operand:V1TI 0 "register_operand" "=x")
5905 (match_operand:V1TI 1 "register_operand" "x")
5906 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5909 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5910 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5912 [(set_attr "type" "sseishft")
5913 (set_attr "prefix" "vex")
5914 (set_attr "length_immediate" "1")
5915 (set_attr "mode" "TI")])
5917 (define_insn "*avx_lshr<mode>3"
5918 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5919 (lshiftrt:SSEMODE248
5920 (match_operand:SSEMODE248 1 "register_operand" "x")
5921 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5923 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5924 [(set_attr "type" "sseishft")
5925 (set_attr "prefix" "vex")
5926 (set (attr "length_immediate")
5927 (if_then_else (match_operand 2 "const_int_operand" "")
5929 (const_string "0")))
5930 (set_attr "mode" "TI")])
5932 (define_insn "sse2_lshrv1ti3"
5933 [(set (match_operand:V1TI 0 "register_operand" "=x")
5935 (match_operand:V1TI 1 "register_operand" "0")
5936 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5939 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5940 return "psrldq\t{%2, %0|%0, %2}";
5942 [(set_attr "type" "sseishft")
5943 (set_attr "prefix_data16" "1")
5944 (set_attr "length_immediate" "1")
5945 (set_attr "mode" "TI")])
5947 (define_insn "lshr<mode>3"
5948 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5949 (lshiftrt:SSEMODE248
5950 (match_operand:SSEMODE248 1 "register_operand" "0")
5951 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5953 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5954 [(set_attr "type" "sseishft")
5955 (set_attr "prefix_data16" "1")
5956 (set (attr "length_immediate")
5957 (if_then_else (match_operand 2 "const_int_operand" "")
5959 (const_string "0")))
5960 (set_attr "mode" "TI")])
5962 (define_insn "*avx_ashlv1ti3"
5963 [(set (match_operand:V1TI 0 "register_operand" "=x")
5964 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5965 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5968 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5969 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5971 [(set_attr "type" "sseishft")
5972 (set_attr "prefix" "vex")
5973 (set_attr "length_immediate" "1")
5974 (set_attr "mode" "TI")])
5976 (define_insn "*avx_ashl<mode>3"
5977 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5979 (match_operand:SSEMODE248 1 "register_operand" "x")
5980 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5982 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5983 [(set_attr "type" "sseishft")
5984 (set_attr "prefix" "vex")
5985 (set (attr "length_immediate")
5986 (if_then_else (match_operand 2 "const_int_operand" "")
5988 (const_string "0")))
5989 (set_attr "mode" "TI")])
5991 (define_insn "sse2_ashlv1ti3"
5992 [(set (match_operand:V1TI 0 "register_operand" "=x")
5993 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5994 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5997 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5998 return "pslldq\t{%2, %0|%0, %2}";
6000 [(set_attr "type" "sseishft")
6001 (set_attr "prefix_data16" "1")
6002 (set_attr "length_immediate" "1")
6003 (set_attr "mode" "TI")])
6005 (define_insn "ashl<mode>3"
6006 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6008 (match_operand:SSEMODE248 1 "register_operand" "0")
6009 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6011 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6012 [(set_attr "type" "sseishft")
6013 (set_attr "prefix_data16" "1")
6014 (set (attr "length_immediate")
6015 (if_then_else (match_operand 2 "const_int_operand" "")
6017 (const_string "0")))
6018 (set_attr "mode" "TI")])
6020 (define_expand "vec_shl_<mode>"
6021 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6023 (match_operand:SSEMODEI 1 "register_operand" "")
6024 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6027 operands[0] = gen_lowpart (V1TImode, operands[0]);
6028 operands[1] = gen_lowpart (V1TImode, operands[1]);
6031 (define_expand "vec_shr_<mode>"
6032 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6034 (match_operand:SSEMODEI 1 "register_operand" "")
6035 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6038 operands[0] = gen_lowpart (V1TImode, operands[0]);
6039 operands[1] = gen_lowpart (V1TImode, operands[1]);
6042 (define_insn "*avx_<code><mode>3"
6043 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6045 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6046 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6047 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6048 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6049 [(set_attr "type" "sseiadd")
6050 (set (attr "prefix_extra")
6052 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6055 (const_string "0")))
6056 (set_attr "prefix" "vex")
6057 (set_attr "mode" "TI")])
6059 (define_expand "<code>v16qi3"
6060 [(set (match_operand:V16QI 0 "register_operand" "")
6062 (match_operand:V16QI 1 "nonimmediate_operand" "")
6063 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6065 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6067 (define_insn "*<code>v16qi3"
6068 [(set (match_operand:V16QI 0 "register_operand" "=x")
6070 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6071 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6072 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6073 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6074 [(set_attr "type" "sseiadd")
6075 (set_attr "prefix_data16" "1")
6076 (set_attr "mode" "TI")])
6078 (define_expand "<code>v8hi3"
6079 [(set (match_operand:V8HI 0 "register_operand" "")
6081 (match_operand:V8HI 1 "nonimmediate_operand" "")
6082 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6084 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6086 (define_insn "*<code>v8hi3"
6087 [(set (match_operand:V8HI 0 "register_operand" "=x")
6089 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6090 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6091 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6092 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6093 [(set_attr "type" "sseiadd")
6094 (set_attr "prefix_data16" "1")
6095 (set_attr "mode" "TI")])
6097 (define_expand "umaxv8hi3"
6098 [(set (match_operand:V8HI 0 "register_operand" "")
6099 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6100 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6104 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6107 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6108 if (rtx_equal_p (op3, op2))
6109 op3 = gen_reg_rtx (V8HImode);
6110 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6111 emit_insn (gen_addv8hi3 (op0, op3, op2));
6116 (define_expand "smax<mode>3"
6117 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6118 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6119 (match_operand:SSEMODE14 2 "register_operand" "")))]
6123 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6129 xops[0] = operands[0];
6130 xops[1] = operands[1];
6131 xops[2] = operands[2];
6132 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6133 xops[4] = operands[1];
6134 xops[5] = operands[2];
6135 ok = ix86_expand_int_vcond (xops);
6141 (define_insn "*sse4_1_<code><mode>3"
6142 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6144 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6145 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6146 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6147 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6148 [(set_attr "type" "sseiadd")
6149 (set_attr "prefix_extra" "1")
6150 (set_attr "mode" "TI")])
6152 (define_expand "smaxv2di3"
6153 [(set (match_operand:V2DI 0 "register_operand" "")
6154 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6155 (match_operand:V2DI 2 "register_operand" "")))]
6161 xops[0] = operands[0];
6162 xops[1] = operands[1];
6163 xops[2] = operands[2];
6164 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6165 xops[4] = operands[1];
6166 xops[5] = operands[2];
6167 ok = ix86_expand_int_vcond (xops);
6172 (define_expand "umaxv4si3"
6173 [(set (match_operand:V4SI 0 "register_operand" "")
6174 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6175 (match_operand:V4SI 2 "register_operand" "")))]
6179 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6185 xops[0] = operands[0];
6186 xops[1] = operands[1];
6187 xops[2] = operands[2];
6188 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6189 xops[4] = operands[1];
6190 xops[5] = operands[2];
6191 ok = ix86_expand_int_vcond (xops);
6197 (define_insn "*sse4_1_<code><mode>3"
6198 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6200 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6201 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6202 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6203 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6204 [(set_attr "type" "sseiadd")
6205 (set_attr "prefix_extra" "1")
6206 (set_attr "mode" "TI")])
6208 (define_expand "umaxv2di3"
6209 [(set (match_operand:V2DI 0 "register_operand" "")
6210 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6211 (match_operand:V2DI 2 "register_operand" "")))]
6217 xops[0] = operands[0];
6218 xops[1] = operands[1];
6219 xops[2] = operands[2];
6220 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6221 xops[4] = operands[1];
6222 xops[5] = operands[2];
6223 ok = ix86_expand_int_vcond (xops);
6228 (define_expand "smin<mode>3"
6229 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6230 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6231 (match_operand:SSEMODE14 2 "register_operand" "")))]
6235 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6241 xops[0] = operands[0];
6242 xops[1] = operands[2];
6243 xops[2] = operands[1];
6244 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6245 xops[4] = operands[1];
6246 xops[5] = operands[2];
6247 ok = ix86_expand_int_vcond (xops);
6253 (define_expand "sminv2di3"
6254 [(set (match_operand:V2DI 0 "register_operand" "")
6255 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6256 (match_operand:V2DI 2 "register_operand" "")))]
6262 xops[0] = operands[0];
6263 xops[1] = operands[2];
6264 xops[2] = operands[1];
6265 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6266 xops[4] = operands[1];
6267 xops[5] = operands[2];
6268 ok = ix86_expand_int_vcond (xops);
6273 (define_expand "umin<mode>3"
6274 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6275 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6276 (match_operand:SSEMODE24 2 "register_operand" "")))]
6280 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6286 xops[0] = operands[0];
6287 xops[1] = operands[2];
6288 xops[2] = operands[1];
6289 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6290 xops[4] = operands[1];
6291 xops[5] = operands[2];
6292 ok = ix86_expand_int_vcond (xops);
6298 (define_expand "uminv2di3"
6299 [(set (match_operand:V2DI 0 "register_operand" "")
6300 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6301 (match_operand:V2DI 2 "register_operand" "")))]
6307 xops[0] = operands[0];
6308 xops[1] = operands[2];
6309 xops[2] = operands[1];
6310 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6311 xops[4] = operands[1];
6312 xops[5] = operands[2];
6313 ok = ix86_expand_int_vcond (xops);
6318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6320 ;; Parallel integral comparisons
6322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6324 (define_expand "sse2_eq<mode>3"
6325 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6327 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6328 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6329 "TARGET_SSE2 && !TARGET_XOP "
6330 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6332 (define_insn "*avx_eq<mode>3"
6333 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6335 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6336 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6337 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6338 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6339 [(set_attr "type" "ssecmp")
6340 (set (attr "prefix_extra")
6341 (if_then_else (match_operand:V2DI 0 "" "")
6343 (const_string "*")))
6344 (set_attr "prefix" "vex")
6345 (set_attr "mode" "TI")])
6347 (define_insn "*sse2_eq<mode>3"
6348 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6350 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6351 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6352 "TARGET_SSE2 && !TARGET_XOP
6353 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6354 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6355 [(set_attr "type" "ssecmp")
6356 (set_attr "prefix_data16" "1")
6357 (set_attr "mode" "TI")])
6359 (define_expand "sse4_1_eqv2di3"
6360 [(set (match_operand:V2DI 0 "register_operand" "")
6362 (match_operand:V2DI 1 "nonimmediate_operand" "")
6363 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6365 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6367 (define_insn "*sse4_1_eqv2di3"
6368 [(set (match_operand:V2DI 0 "register_operand" "=x")
6370 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6371 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6372 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6373 "pcmpeqq\t{%2, %0|%0, %2}"
6374 [(set_attr "type" "ssecmp")
6375 (set_attr "prefix_extra" "1")
6376 (set_attr "mode" "TI")])
6378 (define_insn "*avx_gt<mode>3"
6379 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6381 (match_operand:SSEMODE1248 1 "register_operand" "x")
6382 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6384 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6385 [(set_attr "type" "ssecmp")
6386 (set (attr "prefix_extra")
6387 (if_then_else (match_operand:V2DI 0 "" "")
6389 (const_string "*")))
6390 (set_attr "prefix" "vex")
6391 (set_attr "mode" "TI")])
6393 (define_insn "sse2_gt<mode>3"
6394 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6396 (match_operand:SSEMODE124 1 "register_operand" "0")
6397 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6398 "TARGET_SSE2 && !TARGET_XOP"
6399 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6400 [(set_attr "type" "ssecmp")
6401 (set_attr "prefix_data16" "1")
6402 (set_attr "mode" "TI")])
6404 (define_insn "sse4_2_gtv2di3"
6405 [(set (match_operand:V2DI 0 "register_operand" "=x")
6407 (match_operand:V2DI 1 "register_operand" "0")
6408 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6410 "pcmpgtq\t{%2, %0|%0, %2}"
6411 [(set_attr "type" "ssecmp")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6415 (define_expand "vcond<mode>"
6416 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6417 (if_then_else:SSEMODE124C8
6418 (match_operator 3 ""
6419 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6420 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6421 (match_operand:SSEMODE124C8 1 "general_operand" "")
6422 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6425 bool ok = ix86_expand_int_vcond (operands);
6430 (define_expand "vcondu<mode>"
6431 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6432 (if_then_else:SSEMODE124C8
6433 (match_operator 3 ""
6434 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6435 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6436 (match_operand:SSEMODE124C8 1 "general_operand" "")
6437 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6440 bool ok = ix86_expand_int_vcond (operands);
6445 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6447 ;; Parallel bitwise logical operations
6449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6451 (define_expand "one_cmpl<mode>2"
6452 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6453 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6457 int i, n = GET_MODE_NUNITS (<MODE>mode);
6458 rtvec v = rtvec_alloc (n);
6460 for (i = 0; i < n; ++i)
6461 RTVEC_ELT (v, i) = constm1_rtx;
6463 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6466 (define_insn "*avx_andnot<mode>3"
6467 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6469 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6470 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6472 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6473 [(set_attr "type" "sselog")
6474 (set_attr "prefix" "vex")
6475 (set_attr "mode" "<avxvecpsmode>")])
6477 (define_insn "*sse_andnot<mode>3"
6478 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6480 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6481 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6482 "(TARGET_SSE && !TARGET_SSE2)"
6483 "andnps\t{%2, %0|%0, %2}"
6484 [(set_attr "type" "sselog")
6485 (set_attr "mode" "V4SF")])
6487 (define_insn "*avx_andnot<mode>3"
6488 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6490 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6491 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6493 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6494 [(set_attr "type" "sselog")
6495 (set_attr "prefix" "vex")
6496 (set_attr "mode" "TI")])
6498 (define_insn "sse2_andnot<mode>3"
6499 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6501 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6502 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6504 "pandn\t{%2, %0|%0, %2}"
6505 [(set_attr "type" "sselog")
6506 (set_attr "prefix_data16" "1")
6507 (set_attr "mode" "TI")])
6509 (define_insn "*andnottf3"
6510 [(set (match_operand:TF 0 "register_operand" "=x")
6512 (not:TF (match_operand:TF 1 "register_operand" "0"))
6513 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6515 "pandn\t{%2, %0|%0, %2}"
6516 [(set_attr "type" "sselog")
6517 (set_attr "prefix_data16" "1")
6518 (set_attr "mode" "TI")])
6520 (define_expand "<code><mode>3"
6521 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6523 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6524 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6526 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6528 (define_insn "*avx_<code><mode>3"
6529 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6530 (any_logic:AVX256MODEI
6531 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6532 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6534 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6535 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6536 [(set_attr "type" "sselog")
6537 (set_attr "prefix" "vex")
6538 (set_attr "mode" "<avxvecpsmode>")])
6540 (define_insn "*sse_<code><mode>3"
6541 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6543 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6544 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6545 "(TARGET_SSE && !TARGET_SSE2)
6546 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6547 "<logic>ps\t{%2, %0|%0, %2}"
6548 [(set_attr "type" "sselog")
6549 (set_attr "mode" "V4SF")])
6551 (define_insn "*avx_<code><mode>3"
6552 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6554 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6555 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6557 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6558 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6559 [(set_attr "type" "sselog")
6560 (set_attr "prefix" "vex")
6561 (set_attr "mode" "TI")])
6563 (define_insn "*sse2_<code><mode>3"
6564 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6566 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6567 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6568 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6569 "p<logic>\t{%2, %0|%0, %2}"
6570 [(set_attr "type" "sselog")
6571 (set_attr "prefix_data16" "1")
6572 (set_attr "mode" "TI")])
6574 (define_expand "<code>tf3"
6575 [(set (match_operand:TF 0 "register_operand" "")
6577 (match_operand:TF 1 "nonimmediate_operand" "")
6578 (match_operand:TF 2 "nonimmediate_operand" "")))]
6580 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6582 (define_insn "*<code>tf3"
6583 [(set (match_operand:TF 0 "register_operand" "=x")
6585 (match_operand:TF 1 "nonimmediate_operand" "%0")
6586 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6587 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6588 "p<logic>\t{%2, %0|%0, %2}"
6589 [(set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1")
6591 (set_attr "mode" "TI")])
6593 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6595 ;; Parallel integral element swizzling
6597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6599 (define_expand "vec_pack_trunc_v8hi"
6600 [(match_operand:V16QI 0 "register_operand" "")
6601 (match_operand:V8HI 1 "register_operand" "")
6602 (match_operand:V8HI 2 "register_operand" "")]
6605 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6606 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6607 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6611 (define_expand "vec_pack_trunc_v4si"
6612 [(match_operand:V8HI 0 "register_operand" "")
6613 (match_operand:V4SI 1 "register_operand" "")
6614 (match_operand:V4SI 2 "register_operand" "")]
6617 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6618 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6619 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6623 (define_expand "vec_pack_trunc_v2di"
6624 [(match_operand:V4SI 0 "register_operand" "")
6625 (match_operand:V2DI 1 "register_operand" "")
6626 (match_operand:V2DI 2 "register_operand" "")]
6629 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6630 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6631 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6635 (define_insn "*avx_packsswb"
6636 [(set (match_operand:V16QI 0 "register_operand" "=x")
6639 (match_operand:V8HI 1 "register_operand" "x"))
6641 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6643 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6644 [(set_attr "type" "sselog")
6645 (set_attr "prefix" "vex")
6646 (set_attr "mode" "TI")])
6648 (define_insn "sse2_packsswb"
6649 [(set (match_operand:V16QI 0 "register_operand" "=x")
6652 (match_operand:V8HI 1 "register_operand" "0"))
6654 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6656 "packsswb\t{%2, %0|%0, %2}"
6657 [(set_attr "type" "sselog")
6658 (set_attr "prefix_data16" "1")
6659 (set_attr "mode" "TI")])
6661 (define_insn "*avx_packssdw"
6662 [(set (match_operand:V8HI 0 "register_operand" "=x")
6665 (match_operand:V4SI 1 "register_operand" "x"))
6667 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6669 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix" "vex")
6672 (set_attr "mode" "TI")])
6674 (define_insn "sse2_packssdw"
6675 [(set (match_operand:V8HI 0 "register_operand" "=x")
6678 (match_operand:V4SI 1 "register_operand" "0"))
6680 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6682 "packssdw\t{%2, %0|%0, %2}"
6683 [(set_attr "type" "sselog")
6684 (set_attr "prefix_data16" "1")
6685 (set_attr "mode" "TI")])
6687 (define_insn "*avx_packuswb"
6688 [(set (match_operand:V16QI 0 "register_operand" "=x")
6691 (match_operand:V8HI 1 "register_operand" "x"))
6693 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6695 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6696 [(set_attr "type" "sselog")
6697 (set_attr "prefix" "vex")
6698 (set_attr "mode" "TI")])
6700 (define_insn "sse2_packuswb"
6701 [(set (match_operand:V16QI 0 "register_operand" "=x")
6704 (match_operand:V8HI 1 "register_operand" "0"))
6706 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6708 "packuswb\t{%2, %0|%0, %2}"
6709 [(set_attr "type" "sselog")
6710 (set_attr "prefix_data16" "1")
6711 (set_attr "mode" "TI")])
6713 (define_insn "*avx_interleave_highv16qi"
6714 [(set (match_operand:V16QI 0 "register_operand" "=x")
6717 (match_operand:V16QI 1 "register_operand" "x")
6718 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6719 (parallel [(const_int 8) (const_int 24)
6720 (const_int 9) (const_int 25)
6721 (const_int 10) (const_int 26)
6722 (const_int 11) (const_int 27)
6723 (const_int 12) (const_int 28)
6724 (const_int 13) (const_int 29)
6725 (const_int 14) (const_int 30)
6726 (const_int 15) (const_int 31)])))]
6728 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6729 [(set_attr "type" "sselog")
6730 (set_attr "prefix" "vex")
6731 (set_attr "mode" "TI")])
6733 (define_insn "vec_interleave_highv16qi"
6734 [(set (match_operand:V16QI 0 "register_operand" "=x")
6737 (match_operand:V16QI 1 "register_operand" "0")
6738 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6739 (parallel [(const_int 8) (const_int 24)
6740 (const_int 9) (const_int 25)
6741 (const_int 10) (const_int 26)
6742 (const_int 11) (const_int 27)
6743 (const_int 12) (const_int 28)
6744 (const_int 13) (const_int 29)
6745 (const_int 14) (const_int 30)
6746 (const_int 15) (const_int 31)])))]
6748 "punpckhbw\t{%2, %0|%0, %2}"
6749 [(set_attr "type" "sselog")
6750 (set_attr "prefix_data16" "1")
6751 (set_attr "mode" "TI")])
6753 (define_insn "*avx_interleave_lowv16qi"
6754 [(set (match_operand:V16QI 0 "register_operand" "=x")
6757 (match_operand:V16QI 1 "register_operand" "x")
6758 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6759 (parallel [(const_int 0) (const_int 16)
6760 (const_int 1) (const_int 17)
6761 (const_int 2) (const_int 18)
6762 (const_int 3) (const_int 19)
6763 (const_int 4) (const_int 20)
6764 (const_int 5) (const_int 21)
6765 (const_int 6) (const_int 22)
6766 (const_int 7) (const_int 23)])))]
6768 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6769 [(set_attr "type" "sselog")
6770 (set_attr "prefix" "vex")
6771 (set_attr "mode" "TI")])
6773 (define_insn "vec_interleave_lowv16qi"
6774 [(set (match_operand:V16QI 0 "register_operand" "=x")
6777 (match_operand:V16QI 1 "register_operand" "0")
6778 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6779 (parallel [(const_int 0) (const_int 16)
6780 (const_int 1) (const_int 17)
6781 (const_int 2) (const_int 18)
6782 (const_int 3) (const_int 19)
6783 (const_int 4) (const_int 20)
6784 (const_int 5) (const_int 21)
6785 (const_int 6) (const_int 22)
6786 (const_int 7) (const_int 23)])))]
6788 "punpcklbw\t{%2, %0|%0, %2}"
6789 [(set_attr "type" "sselog")
6790 (set_attr "prefix_data16" "1")
6791 (set_attr "mode" "TI")])
6793 (define_insn "*avx_interleave_highv8hi"
6794 [(set (match_operand:V8HI 0 "register_operand" "=x")
6797 (match_operand:V8HI 1 "register_operand" "x")
6798 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6799 (parallel [(const_int 4) (const_int 12)
6800 (const_int 5) (const_int 13)
6801 (const_int 6) (const_int 14)
6802 (const_int 7) (const_int 15)])))]
6804 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6805 [(set_attr "type" "sselog")
6806 (set_attr "prefix" "vex")
6807 (set_attr "mode" "TI")])
6809 (define_insn "vec_interleave_highv8hi"
6810 [(set (match_operand:V8HI 0 "register_operand" "=x")
6813 (match_operand:V8HI 1 "register_operand" "0")
6814 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6815 (parallel [(const_int 4) (const_int 12)
6816 (const_int 5) (const_int 13)
6817 (const_int 6) (const_int 14)
6818 (const_int 7) (const_int 15)])))]
6820 "punpckhwd\t{%2, %0|%0, %2}"
6821 [(set_attr "type" "sselog")
6822 (set_attr "prefix_data16" "1")
6823 (set_attr "mode" "TI")])
6825 (define_insn "*avx_interleave_lowv8hi"
6826 [(set (match_operand:V8HI 0 "register_operand" "=x")
6829 (match_operand:V8HI 1 "register_operand" "x")
6830 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6831 (parallel [(const_int 0) (const_int 8)
6832 (const_int 1) (const_int 9)
6833 (const_int 2) (const_int 10)
6834 (const_int 3) (const_int 11)])))]
6836 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6837 [(set_attr "type" "sselog")
6838 (set_attr "prefix" "vex")
6839 (set_attr "mode" "TI")])
6841 (define_insn "vec_interleave_lowv8hi"
6842 [(set (match_operand:V8HI 0 "register_operand" "=x")
6845 (match_operand:V8HI 1 "register_operand" "0")
6846 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6847 (parallel [(const_int 0) (const_int 8)
6848 (const_int 1) (const_int 9)
6849 (const_int 2) (const_int 10)
6850 (const_int 3) (const_int 11)])))]
6852 "punpcklwd\t{%2, %0|%0, %2}"
6853 [(set_attr "type" "sselog")
6854 (set_attr "prefix_data16" "1")
6855 (set_attr "mode" "TI")])
6857 (define_insn "*avx_interleave_highv4si"
6858 [(set (match_operand:V4SI 0 "register_operand" "=x")
6861 (match_operand:V4SI 1 "register_operand" "x")
6862 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6863 (parallel [(const_int 2) (const_int 6)
6864 (const_int 3) (const_int 7)])))]
6866 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6867 [(set_attr "type" "sselog")
6868 (set_attr "prefix" "vex")
6869 (set_attr "mode" "TI")])
6871 (define_insn "vec_interleave_highv4si"
6872 [(set (match_operand:V4SI 0 "register_operand" "=x")
6875 (match_operand:V4SI 1 "register_operand" "0")
6876 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6877 (parallel [(const_int 2) (const_int 6)
6878 (const_int 3) (const_int 7)])))]
6880 "punpckhdq\t{%2, %0|%0, %2}"
6881 [(set_attr "type" "sselog")
6882 (set_attr "prefix_data16" "1")
6883 (set_attr "mode" "TI")])
6885 (define_insn "*avx_interleave_lowv4si"
6886 [(set (match_operand:V4SI 0 "register_operand" "=x")
6889 (match_operand:V4SI 1 "register_operand" "x")
6890 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6891 (parallel [(const_int 0) (const_int 4)
6892 (const_int 1) (const_int 5)])))]
6894 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6895 [(set_attr "type" "sselog")
6896 (set_attr "prefix" "vex")
6897 (set_attr "mode" "TI")])
6899 (define_insn "vec_interleave_lowv4si"
6900 [(set (match_operand:V4SI 0 "register_operand" "=x")
6903 (match_operand:V4SI 1 "register_operand" "0")
6904 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6905 (parallel [(const_int 0) (const_int 4)
6906 (const_int 1) (const_int 5)])))]
6908 "punpckldq\t{%2, %0|%0, %2}"
6909 [(set_attr "type" "sselog")
6910 (set_attr "prefix_data16" "1")
6911 (set_attr "mode" "TI")])
6913 (define_insn "*avx_pinsr<ssevecsize>"
6914 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6915 (vec_merge:SSEMODE124
6916 (vec_duplicate:SSEMODE124
6917 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6918 (match_operand:SSEMODE124 1 "register_operand" "x")
6919 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6922 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6923 if (MEM_P (operands[2]))
6924 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6926 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6928 [(set_attr "type" "sselog")
6929 (set (attr "prefix_extra")
6930 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6932 (const_string "1")))
6933 (set_attr "length_immediate" "1")
6934 (set_attr "prefix" "vex")
6935 (set_attr "mode" "TI")])
6937 (define_insn "*sse4_1_pinsrb"
6938 [(set (match_operand:V16QI 0 "register_operand" "=x")
6940 (vec_duplicate:V16QI
6941 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6942 (match_operand:V16QI 1 "register_operand" "0")
6943 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6946 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6947 if (MEM_P (operands[2]))
6948 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6950 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6952 [(set_attr "type" "sselog")
6953 (set_attr "prefix_extra" "1")
6954 (set_attr "length_immediate" "1")
6955 (set_attr "mode" "TI")])
6957 (define_insn "*sse2_pinsrw"
6958 [(set (match_operand:V8HI 0 "register_operand" "=x")
6961 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6962 (match_operand:V8HI 1 "register_operand" "0")
6963 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6966 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6967 if (MEM_P (operands[2]))
6968 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6970 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6972 [(set_attr "type" "sselog")
6973 (set_attr "prefix_data16" "1")
6974 (set_attr "length_immediate" "1")
6975 (set_attr "mode" "TI")])
6977 ;; It must come before sse2_loadld since it is preferred.
6978 (define_insn "*sse4_1_pinsrd"
6979 [(set (match_operand:V4SI 0 "register_operand" "=x")
6982 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6983 (match_operand:V4SI 1 "register_operand" "0")
6984 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6987 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6988 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6990 [(set_attr "type" "sselog")
6991 (set_attr "prefix_extra" "1")
6992 (set_attr "length_immediate" "1")
6993 (set_attr "mode" "TI")])
6995 (define_insn "*avx_pinsrq"
6996 [(set (match_operand:V2DI 0 "register_operand" "=x")
6999 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7000 (match_operand:V2DI 1 "register_operand" "x")
7001 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7002 "TARGET_AVX && TARGET_64BIT"
7004 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7005 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7007 [(set_attr "type" "sselog")
7008 (set_attr "prefix_extra" "1")
7009 (set_attr "length_immediate" "1")
7010 (set_attr "prefix" "vex")
7011 (set_attr "mode" "TI")])
7013 (define_insn "*sse4_1_pinsrq"
7014 [(set (match_operand:V2DI 0 "register_operand" "=x")
7017 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7018 (match_operand:V2DI 1 "register_operand" "0")
7019 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7020 "TARGET_SSE4_1 && TARGET_64BIT"
7022 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7023 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7025 [(set_attr "type" "sselog")
7026 (set_attr "prefix_rex" "1")
7027 (set_attr "prefix_extra" "1")
7028 (set_attr "length_immediate" "1")
7029 (set_attr "mode" "TI")])
7031 (define_insn "*sse4_1_pextrb"
7032 [(set (match_operand:SI 0 "register_operand" "=r")
7035 (match_operand:V16QI 1 "register_operand" "x")
7036 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7038 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7039 [(set_attr "type" "sselog")
7040 (set_attr "prefix_extra" "1")
7041 (set_attr "length_immediate" "1")
7042 (set_attr "prefix" "maybe_vex")
7043 (set_attr "mode" "TI")])
7045 (define_insn "*sse4_1_pextrb_memory"
7046 [(set (match_operand:QI 0 "memory_operand" "=m")
7048 (match_operand:V16QI 1 "register_operand" "x")
7049 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7051 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7052 [(set_attr "type" "sselog")
7053 (set_attr "prefix_extra" "1")
7054 (set_attr "length_immediate" "1")
7055 (set_attr "prefix" "maybe_vex")
7056 (set_attr "mode" "TI")])
7058 (define_insn "*sse2_pextrw"
7059 [(set (match_operand:SI 0 "register_operand" "=r")
7062 (match_operand:V8HI 1 "register_operand" "x")
7063 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7065 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7066 [(set_attr "type" "sselog")
7067 (set_attr "prefix_data16" "1")
7068 (set_attr "length_immediate" "1")
7069 (set_attr "prefix" "maybe_vex")
7070 (set_attr "mode" "TI")])
7072 (define_insn "*sse4_1_pextrw_memory"
7073 [(set (match_operand:HI 0 "memory_operand" "=m")
7075 (match_operand:V8HI 1 "register_operand" "x")
7076 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7078 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7079 [(set_attr "type" "sselog")
7080 (set_attr "prefix_extra" "1")
7081 (set_attr "length_immediate" "1")
7082 (set_attr "prefix" "maybe_vex")
7083 (set_attr "mode" "TI")])
7085 (define_insn "*sse4_1_pextrd"
7086 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7088 (match_operand:V4SI 1 "register_operand" "x")
7089 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7091 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7092 [(set_attr "type" "sselog")
7093 (set_attr "prefix_extra" "1")
7094 (set_attr "length_immediate" "1")
7095 (set_attr "prefix" "maybe_vex")
7096 (set_attr "mode" "TI")])
7098 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7099 (define_insn "*sse4_1_pextrq"
7100 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7102 (match_operand:V2DI 1 "register_operand" "x")
7103 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7104 "TARGET_SSE4_1 && TARGET_64BIT"
7105 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7106 [(set_attr "type" "sselog")
7107 (set_attr "prefix_rex" "1")
7108 (set_attr "prefix_extra" "1")
7109 (set_attr "length_immediate" "1")
7110 (set_attr "prefix" "maybe_vex")
7111 (set_attr "mode" "TI")])
7113 (define_expand "sse2_pshufd"
7114 [(match_operand:V4SI 0 "register_operand" "")
7115 (match_operand:V4SI 1 "nonimmediate_operand" "")
7116 (match_operand:SI 2 "const_int_operand" "")]
7119 int mask = INTVAL (operands[2]);
7120 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7121 GEN_INT ((mask >> 0) & 3),
7122 GEN_INT ((mask >> 2) & 3),
7123 GEN_INT ((mask >> 4) & 3),
7124 GEN_INT ((mask >> 6) & 3)));
7128 (define_insn "sse2_pshufd_1"
7129 [(set (match_operand:V4SI 0 "register_operand" "=x")
7131 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7132 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7133 (match_operand 3 "const_0_to_3_operand" "")
7134 (match_operand 4 "const_0_to_3_operand" "")
7135 (match_operand 5 "const_0_to_3_operand" "")])))]
7139 mask |= INTVAL (operands[2]) << 0;
7140 mask |= INTVAL (operands[3]) << 2;
7141 mask |= INTVAL (operands[4]) << 4;
7142 mask |= INTVAL (operands[5]) << 6;
7143 operands[2] = GEN_INT (mask);
7145 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7147 [(set_attr "type" "sselog1")
7148 (set_attr "prefix_data16" "1")
7149 (set_attr "prefix" "maybe_vex")
7150 (set_attr "length_immediate" "1")
7151 (set_attr "mode" "TI")])
7153 (define_expand "sse2_pshuflw"
7154 [(match_operand:V8HI 0 "register_operand" "")
7155 (match_operand:V8HI 1 "nonimmediate_operand" "")
7156 (match_operand:SI 2 "const_int_operand" "")]
7159 int mask = INTVAL (operands[2]);
7160 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7161 GEN_INT ((mask >> 0) & 3),
7162 GEN_INT ((mask >> 2) & 3),
7163 GEN_INT ((mask >> 4) & 3),
7164 GEN_INT ((mask >> 6) & 3)));
7168 (define_insn "sse2_pshuflw_1"
7169 [(set (match_operand:V8HI 0 "register_operand" "=x")
7171 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7172 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7173 (match_operand 3 "const_0_to_3_operand" "")
7174 (match_operand 4 "const_0_to_3_operand" "")
7175 (match_operand 5 "const_0_to_3_operand" "")
7183 mask |= INTVAL (operands[2]) << 0;
7184 mask |= INTVAL (operands[3]) << 2;
7185 mask |= INTVAL (operands[4]) << 4;
7186 mask |= INTVAL (operands[5]) << 6;
7187 operands[2] = GEN_INT (mask);
7189 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7191 [(set_attr "type" "sselog")
7192 (set_attr "prefix_data16" "0")
7193 (set_attr "prefix_rep" "1")
7194 (set_attr "prefix" "maybe_vex")
7195 (set_attr "length_immediate" "1")
7196 (set_attr "mode" "TI")])
7198 (define_expand "sse2_pshufhw"
7199 [(match_operand:V8HI 0 "register_operand" "")
7200 (match_operand:V8HI 1 "nonimmediate_operand" "")
7201 (match_operand:SI 2 "const_int_operand" "")]
7204 int mask = INTVAL (operands[2]);
7205 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7206 GEN_INT (((mask >> 0) & 3) + 4),
7207 GEN_INT (((mask >> 2) & 3) + 4),
7208 GEN_INT (((mask >> 4) & 3) + 4),
7209 GEN_INT (((mask >> 6) & 3) + 4)));
7213 (define_insn "sse2_pshufhw_1"
7214 [(set (match_operand:V8HI 0 "register_operand" "=x")
7216 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7217 (parallel [(const_int 0)
7221 (match_operand 2 "const_4_to_7_operand" "")
7222 (match_operand 3 "const_4_to_7_operand" "")
7223 (match_operand 4 "const_4_to_7_operand" "")
7224 (match_operand 5 "const_4_to_7_operand" "")])))]
7228 mask |= (INTVAL (operands[2]) - 4) << 0;
7229 mask |= (INTVAL (operands[3]) - 4) << 2;
7230 mask |= (INTVAL (operands[4]) - 4) << 4;
7231 mask |= (INTVAL (operands[5]) - 4) << 6;
7232 operands[2] = GEN_INT (mask);
7234 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7236 [(set_attr "type" "sselog")
7237 (set_attr "prefix_rep" "1")
7238 (set_attr "prefix_data16" "0")
7239 (set_attr "prefix" "maybe_vex")
7240 (set_attr "length_immediate" "1")
7241 (set_attr "mode" "TI")])
7243 (define_expand "sse2_loadd"
7244 [(set (match_operand:V4SI 0 "register_operand" "")
7247 (match_operand:SI 1 "nonimmediate_operand" ""))
7251 "operands[2] = CONST0_RTX (V4SImode);")
7253 (define_insn "*avx_loadld"
7254 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7257 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7258 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7262 vmovd\t{%2, %0|%0, %2}
7263 vmovd\t{%2, %0|%0, %2}
7264 vmovss\t{%2, %1, %0|%0, %1, %2}"
7265 [(set_attr "type" "ssemov")
7266 (set_attr "prefix" "vex")
7267 (set_attr "mode" "TI,TI,V4SF")])
7269 (define_insn "sse2_loadld"
7270 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7273 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7274 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7278 movd\t{%2, %0|%0, %2}
7279 movd\t{%2, %0|%0, %2}
7280 movss\t{%2, %0|%0, %2}
7281 movss\t{%2, %0|%0, %2}"
7282 [(set_attr "type" "ssemov")
7283 (set_attr "mode" "TI,TI,V4SF,SF")])
7285 (define_insn_and_split "sse2_stored"
7286 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7288 (match_operand:V4SI 1 "register_operand" "x,Yi")
7289 (parallel [(const_int 0)])))]
7292 "&& reload_completed
7293 && (TARGET_INTER_UNIT_MOVES
7294 || MEM_P (operands [0])
7295 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7296 [(set (match_dup 0) (match_dup 1))]
7298 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7301 (define_insn_and_split "*vec_ext_v4si_mem"
7302 [(set (match_operand:SI 0 "register_operand" "=r")
7304 (match_operand:V4SI 1 "memory_operand" "o")
7305 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7311 int i = INTVAL (operands[2]);
7313 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7317 (define_expand "sse_storeq"
7318 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7320 (match_operand:V2DI 1 "register_operand" "")
7321 (parallel [(const_int 0)])))]
7325 (define_insn "*sse2_storeq_rex64"
7326 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7328 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7329 (parallel [(const_int 0)])))]
7330 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7334 %vmov{q}\t{%1, %0|%0, %1}"
7335 [(set_attr "type" "*,*,imov")
7336 (set_attr "prefix" "*,*,maybe_vex")
7337 (set_attr "mode" "*,*,DI")])
7339 (define_insn "*sse2_storeq"
7340 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7342 (match_operand:V2DI 1 "register_operand" "x")
7343 (parallel [(const_int 0)])))]
7348 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7350 (match_operand:V2DI 1 "register_operand" "")
7351 (parallel [(const_int 0)])))]
7354 && (TARGET_INTER_UNIT_MOVES
7355 || MEM_P (operands [0])
7356 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7357 [(set (match_dup 0) (match_dup 1))]
7359 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7362 (define_insn "*vec_extractv2di_1_rex64_avx"
7363 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7365 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7366 (parallel [(const_int 1)])))]
7369 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7371 vmovhps\t{%1, %0|%0, %1}
7372 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7373 vmovq\t{%H1, %0|%0, %H1}
7374 vmov{q}\t{%H1, %0|%0, %H1}"
7375 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7376 (set_attr "length_immediate" "*,1,*,*")
7377 (set_attr "memory" "*,none,*,*")
7378 (set_attr "prefix" "vex")
7379 (set_attr "mode" "V2SF,TI,TI,DI")])
7381 (define_insn "*vec_extractv2di_1_rex64"
7382 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7384 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7385 (parallel [(const_int 1)])))]
7386 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7388 movhps\t{%1, %0|%0, %1}
7389 psrldq\t{$8, %0|%0, 8}
7390 movq\t{%H1, %0|%0, %H1}
7391 mov{q}\t{%H1, %0|%0, %H1}"
7392 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7393 (set_attr "length_immediate" "*,1,*,*")
7394 (set_attr "atom_unit" "*,sishuf,*,*")
7395 (set_attr "memory" "*,none,*,*")
7396 (set_attr "mode" "V2SF,TI,TI,DI")])
7398 (define_insn "*vec_extractv2di_1_avx"
7399 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7401 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7402 (parallel [(const_int 1)])))]
7405 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7407 vmovhps\t{%1, %0|%0, %1}
7408 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7409 vmovq\t{%H1, %0|%0, %H1}"
7410 [(set_attr "type" "ssemov,sseishft,ssemov")
7411 (set_attr "length_immediate" "*,1,*")
7412 (set_attr "memory" "*,none,*")
7413 (set_attr "prefix" "vex")
7414 (set_attr "mode" "V2SF,TI,TI")])
7416 (define_insn "*vec_extractv2di_1_sse2"
7417 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7419 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7420 (parallel [(const_int 1)])))]
7422 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7424 movhps\t{%1, %0|%0, %1}
7425 psrldq\t{$8, %0|%0, 8}
7426 movq\t{%H1, %0|%0, %H1}"
7427 [(set_attr "type" "ssemov,sseishft,ssemov")
7428 (set_attr "length_immediate" "*,1,*")
7429 (set_attr "atom_unit" "*,sishuf,*")
7430 (set_attr "memory" "*,none,*")
7431 (set_attr "mode" "V2SF,TI,TI")])
7433 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7434 (define_insn "*vec_extractv2di_1_sse"
7435 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7437 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7438 (parallel [(const_int 1)])))]
7439 "!TARGET_SSE2 && TARGET_SSE
7440 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7442 movhps\t{%1, %0|%0, %1}
7443 movhlps\t{%1, %0|%0, %1}
7444 movlps\t{%H1, %0|%0, %H1}"
7445 [(set_attr "type" "ssemov")
7446 (set_attr "mode" "V2SF,V4SF,V2SF")])
7448 (define_insn "*vec_dupv4si_avx"
7449 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7451 (match_operand:SI 1 "register_operand" "x,m")))]
7454 vpshufd\t{$0, %1, %0|%0, %1, 0}
7455 vbroadcastss\t{%1, %0|%0, %1}"
7456 [(set_attr "type" "sselog1,ssemov")
7457 (set_attr "length_immediate" "1,0")
7458 (set_attr "prefix_extra" "0,1")
7459 (set_attr "prefix" "vex")
7460 (set_attr "mode" "TI,V4SF")])
7462 (define_insn "*vec_dupv4si"
7463 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7465 (match_operand:SI 1 "register_operand" " Y2,0")))]
7468 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7469 shufps\t{$0, %0, %0|%0, %0, 0}"
7470 [(set_attr "type" "sselog1")
7471 (set_attr "length_immediate" "1")
7472 (set_attr "mode" "TI,V4SF")])
7474 (define_insn "*vec_dupv2di_avx"
7475 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7477 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7480 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7481 vmovddup\t{%1, %0|%0, %1}"
7482 [(set_attr "type" "sselog1")
7483 (set_attr "prefix" "vex")
7484 (set_attr "mode" "TI,DF")])
7486 (define_insn "*vec_dupv2di_sse3"
7487 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7489 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7493 movddup\t{%1, %0|%0, %1}"
7494 [(set_attr "type" "sselog1")
7495 (set_attr "mode" "TI,DF")])
7497 (define_insn "*vec_dupv2di"
7498 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7500 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7505 [(set_attr "type" "sselog1,ssemov")
7506 (set_attr "mode" "TI,V4SF")])
7508 (define_insn "*vec_concatv2si_avx"
7509 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7511 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7512 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7515 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7516 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7517 vmovd\t{%1, %0|%0, %1}
7518 punpckldq\t{%2, %0|%0, %2}
7519 movd\t{%1, %0|%0, %1}"
7520 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7521 (set_attr "prefix_extra" "1,*,*,*,*")
7522 (set_attr "length_immediate" "1,*,*,*,*")
7523 (set (attr "prefix")
7524 (if_then_else (eq_attr "alternative" "3,4")
7525 (const_string "orig")
7526 (const_string "vex")))
7527 (set_attr "mode" "TI,TI,TI,DI,DI")])
7529 (define_insn "*vec_concatv2si_sse4_1"
7530 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7532 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7533 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7536 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7537 punpckldq\t{%2, %0|%0, %2}
7538 movd\t{%1, %0|%0, %1}
7539 punpckldq\t{%2, %0|%0, %2}
7540 movd\t{%1, %0|%0, %1}"
7541 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7542 (set_attr "prefix_extra" "1,*,*,*,*")
7543 (set_attr "length_immediate" "1,*,*,*,*")
7544 (set_attr "mode" "TI,TI,TI,DI,DI")])
7546 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7547 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7548 ;; alternatives pretty much forces the MMX alternative to be chosen.
7549 (define_insn "*vec_concatv2si_sse2"
7550 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7552 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7553 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7556 punpckldq\t{%2, %0|%0, %2}
7557 movd\t{%1, %0|%0, %1}
7558 punpckldq\t{%2, %0|%0, %2}
7559 movd\t{%1, %0|%0, %1}"
7560 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7561 (set_attr "mode" "TI,TI,DI,DI")])
7563 (define_insn "*vec_concatv2si_sse"
7564 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7566 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7567 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7570 unpcklps\t{%2, %0|%0, %2}
7571 movss\t{%1, %0|%0, %1}
7572 punpckldq\t{%2, %0|%0, %2}
7573 movd\t{%1, %0|%0, %1}"
7574 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7575 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7577 (define_insn "*vec_concatv4si_1_avx"
7578 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7580 (match_operand:V2SI 1 "register_operand" " x,x")
7581 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7584 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7585 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7586 [(set_attr "type" "sselog,ssemov")
7587 (set_attr "prefix" "vex")
7588 (set_attr "mode" "TI,V2SF")])
7590 (define_insn "*vec_concatv4si_1"
7591 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7593 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7594 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7597 punpcklqdq\t{%2, %0|%0, %2}
7598 movlhps\t{%2, %0|%0, %2}
7599 movhps\t{%2, %0|%0, %2}"
7600 [(set_attr "type" "sselog,ssemov,ssemov")
7601 (set_attr "mode" "TI,V4SF,V2SF")])
7603 (define_insn "*vec_concatv2di_avx"
7604 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7606 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7607 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7608 "!TARGET_64BIT && TARGET_AVX"
7610 vmovq\t{%1, %0|%0, %1}
7611 movq2dq\t{%1, %0|%0, %1}
7612 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7613 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7614 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7615 (set (attr "prefix")
7616 (if_then_else (eq_attr "alternative" "1")
7617 (const_string "orig")
7618 (const_string "vex")))
7619 (set_attr "mode" "TI,TI,TI,V2SF")])
7621 (define_insn "vec_concatv2di"
7622 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7624 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7625 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7626 "!TARGET_64BIT && TARGET_SSE"
7628 movq\t{%1, %0|%0, %1}
7629 movq2dq\t{%1, %0|%0, %1}
7630 punpcklqdq\t{%2, %0|%0, %2}
7631 movlhps\t{%2, %0|%0, %2}
7632 movhps\t{%2, %0|%0, %2}"
7633 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7634 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7636 (define_insn "*vec_concatv2di_rex64_avx"
7637 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7639 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7640 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7641 "TARGET_64BIT && TARGET_AVX"
7643 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7644 vmovq\t{%1, %0|%0, %1}
7645 vmovq\t{%1, %0|%0, %1}
7646 movq2dq\t{%1, %0|%0, %1}
7647 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7648 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7649 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7650 (set_attr "prefix_extra" "1,*,*,*,*,*")
7651 (set_attr "length_immediate" "1,*,*,*,*,*")
7652 (set (attr "prefix")
7653 (if_then_else (eq_attr "alternative" "3")
7654 (const_string "orig")
7655 (const_string "vex")))
7656 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7658 (define_insn "*vec_concatv2di_rex64_sse4_1"
7659 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7661 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7662 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7663 "TARGET_64BIT && TARGET_SSE4_1"
7665 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7666 movq\t{%1, %0|%0, %1}
7667 movq\t{%1, %0|%0, %1}
7668 movq2dq\t{%1, %0|%0, %1}
7669 punpcklqdq\t{%2, %0|%0, %2}
7670 movlhps\t{%2, %0|%0, %2}
7671 movhps\t{%2, %0|%0, %2}"
7672 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7673 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7674 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7675 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7676 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7678 (define_insn "*vec_concatv2di_rex64_sse"
7679 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7681 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7682 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7683 "TARGET_64BIT && TARGET_SSE"
7685 movq\t{%1, %0|%0, %1}
7686 movq\t{%1, %0|%0, %1}
7687 movq2dq\t{%1, %0|%0, %1}
7688 punpcklqdq\t{%2, %0|%0, %2}
7689 movlhps\t{%2, %0|%0, %2}
7690 movhps\t{%2, %0|%0, %2}"
7691 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7692 (set_attr "prefix_rex" "*,1,*,*,*,*")
7693 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7695 (define_expand "vec_unpacku_hi_v16qi"
7696 [(match_operand:V8HI 0 "register_operand" "")
7697 (match_operand:V16QI 1 "register_operand" "")]
7701 ix86_expand_sse4_unpack (operands, true, true);
7703 ix86_expand_sse_unpack (operands, true, true);
7707 (define_expand "vec_unpacks_hi_v16qi"
7708 [(match_operand:V8HI 0 "register_operand" "")
7709 (match_operand:V16QI 1 "register_operand" "")]
7713 ix86_expand_sse4_unpack (operands, false, true);
7715 ix86_expand_sse_unpack (operands, false, true);
7719 (define_expand "vec_unpacku_lo_v16qi"
7720 [(match_operand:V8HI 0 "register_operand" "")
7721 (match_operand:V16QI 1 "register_operand" "")]
7725 ix86_expand_sse4_unpack (operands, true, false);
7727 ix86_expand_sse_unpack (operands, true, false);
7731 (define_expand "vec_unpacks_lo_v16qi"
7732 [(match_operand:V8HI 0 "register_operand" "")
7733 (match_operand:V16QI 1 "register_operand" "")]
7737 ix86_expand_sse4_unpack (operands, false, false);
7739 ix86_expand_sse_unpack (operands, false, false);
7743 (define_expand "vec_unpacku_hi_v8hi"
7744 [(match_operand:V4SI 0 "register_operand" "")
7745 (match_operand:V8HI 1 "register_operand" "")]
7749 ix86_expand_sse4_unpack (operands, true, true);
7751 ix86_expand_sse_unpack (operands, true, true);
7755 (define_expand "vec_unpacks_hi_v8hi"
7756 [(match_operand:V4SI 0 "register_operand" "")
7757 (match_operand:V8HI 1 "register_operand" "")]
7761 ix86_expand_sse4_unpack (operands, false, true);
7763 ix86_expand_sse_unpack (operands, false, true);
7767 (define_expand "vec_unpacku_lo_v8hi"
7768 [(match_operand:V4SI 0 "register_operand" "")
7769 (match_operand:V8HI 1 "register_operand" "")]
7773 ix86_expand_sse4_unpack (operands, true, false);
7775 ix86_expand_sse_unpack (operands, true, false);
7779 (define_expand "vec_unpacks_lo_v8hi"
7780 [(match_operand:V4SI 0 "register_operand" "")
7781 (match_operand:V8HI 1 "register_operand" "")]
7785 ix86_expand_sse4_unpack (operands, false, false);
7787 ix86_expand_sse_unpack (operands, false, false);
7791 (define_expand "vec_unpacku_hi_v4si"
7792 [(match_operand:V2DI 0 "register_operand" "")
7793 (match_operand:V4SI 1 "register_operand" "")]
7797 ix86_expand_sse4_unpack (operands, true, true);
7799 ix86_expand_sse_unpack (operands, true, true);
7803 (define_expand "vec_unpacks_hi_v4si"
7804 [(match_operand:V2DI 0 "register_operand" "")
7805 (match_operand:V4SI 1 "register_operand" "")]
7809 ix86_expand_sse4_unpack (operands, false, true);
7811 ix86_expand_sse_unpack (operands, false, true);
7815 (define_expand "vec_unpacku_lo_v4si"
7816 [(match_operand:V2DI 0 "register_operand" "")
7817 (match_operand:V4SI 1 "register_operand" "")]
7821 ix86_expand_sse4_unpack (operands, true, false);
7823 ix86_expand_sse_unpack (operands, true, false);
7827 (define_expand "vec_unpacks_lo_v4si"
7828 [(match_operand:V2DI 0 "register_operand" "")
7829 (match_operand:V4SI 1 "register_operand" "")]
7833 ix86_expand_sse4_unpack (operands, false, false);
7835 ix86_expand_sse_unpack (operands, false, false);
7839 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7843 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7845 (define_expand "sse2_uavgv16qi3"
7846 [(set (match_operand:V16QI 0 "register_operand" "")
7852 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7854 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7855 (const_vector:V16QI [(const_int 1) (const_int 1)
7856 (const_int 1) (const_int 1)
7857 (const_int 1) (const_int 1)
7858 (const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)
7862 (const_int 1) (const_int 1)]))
7865 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7867 (define_insn "*avx_uavgv16qi3"
7868 [(set (match_operand:V16QI 0 "register_operand" "=x")
7874 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7876 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7877 (const_vector:V16QI [(const_int 1) (const_int 1)
7878 (const_int 1) (const_int 1)
7879 (const_int 1) (const_int 1)
7880 (const_int 1) (const_int 1)
7881 (const_int 1) (const_int 1)
7882 (const_int 1) (const_int 1)
7883 (const_int 1) (const_int 1)
7884 (const_int 1) (const_int 1)]))
7886 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7887 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7888 [(set_attr "type" "sseiadd")
7889 (set_attr "prefix" "vex")
7890 (set_attr "mode" "TI")])
7892 (define_insn "*sse2_uavgv16qi3"
7893 [(set (match_operand:V16QI 0 "register_operand" "=x")
7899 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7901 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7902 (const_vector:V16QI [(const_int 1) (const_int 1)
7903 (const_int 1) (const_int 1)
7904 (const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)
7907 (const_int 1) (const_int 1)
7908 (const_int 1) (const_int 1)
7909 (const_int 1) (const_int 1)]))
7911 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7912 "pavgb\t{%2, %0|%0, %2}"
7913 [(set_attr "type" "sseiadd")
7914 (set_attr "prefix_data16" "1")
7915 (set_attr "mode" "TI")])
7917 (define_expand "sse2_uavgv8hi3"
7918 [(set (match_operand:V8HI 0 "register_operand" "")
7924 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7926 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7927 (const_vector:V8HI [(const_int 1) (const_int 1)
7928 (const_int 1) (const_int 1)
7929 (const_int 1) (const_int 1)
7930 (const_int 1) (const_int 1)]))
7933 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7935 (define_insn "*avx_uavgv8hi3"
7936 [(set (match_operand:V8HI 0 "register_operand" "=x")
7942 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7944 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7945 (const_vector:V8HI [(const_int 1) (const_int 1)
7946 (const_int 1) (const_int 1)
7947 (const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)]))
7950 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7951 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7952 [(set_attr "type" "sseiadd")
7953 (set_attr "prefix" "vex")
7954 (set_attr "mode" "TI")])
7956 (define_insn "*sse2_uavgv8hi3"
7957 [(set (match_operand:V8HI 0 "register_operand" "=x")
7963 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7965 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7966 (const_vector:V8HI [(const_int 1) (const_int 1)
7967 (const_int 1) (const_int 1)
7968 (const_int 1) (const_int 1)
7969 (const_int 1) (const_int 1)]))
7971 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7972 "pavgw\t{%2, %0|%0, %2}"
7973 [(set_attr "type" "sseiadd")
7974 (set_attr "prefix_data16" "1")
7975 (set_attr "mode" "TI")])
7977 ;; The correct representation for this is absolutely enormous, and
7978 ;; surely not generally useful.
7979 (define_insn "*avx_psadbw"
7980 [(set (match_operand:V2DI 0 "register_operand" "=x")
7981 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7982 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7985 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7986 [(set_attr "type" "sseiadd")
7987 (set_attr "prefix" "vex")
7988 (set_attr "mode" "TI")])
7990 (define_insn "sse2_psadbw"
7991 [(set (match_operand:V2DI 0 "register_operand" "=x")
7992 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7993 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7996 "psadbw\t{%2, %0|%0, %2}"
7997 [(set_attr "type" "sseiadd")
7998 (set_attr "atom_unit" "simul")
7999 (set_attr "prefix_data16" "1")
8000 (set_attr "mode" "TI")])
8002 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
8003 [(set (match_operand:SI 0 "register_operand" "=r")
8005 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8007 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8008 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
8009 [(set_attr "type" "ssecvt")
8010 (set_attr "prefix" "vex")
8011 (set_attr "mode" "<MODE>")])
8013 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8014 [(set (match_operand:SI 0 "register_operand" "=r")
8016 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8018 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8019 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8020 [(set_attr "type" "ssemov")
8021 (set_attr "prefix" "maybe_vex")
8022 (set_attr "mode" "<MODE>")])
8024 (define_insn "sse2_pmovmskb"
8025 [(set (match_operand:SI 0 "register_operand" "=r")
8026 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8029 "%vpmovmskb\t{%1, %0|%0, %1}"
8030 [(set_attr "type" "ssemov")
8031 (set_attr "prefix_data16" "1")
8032 (set_attr "prefix" "maybe_vex")
8033 (set_attr "mode" "SI")])
8035 (define_expand "sse2_maskmovdqu"
8036 [(set (match_operand:V16QI 0 "memory_operand" "")
8037 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8038 (match_operand:V16QI 2 "register_operand" "")
8044 (define_insn "*sse2_maskmovdqu"
8045 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8046 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8047 (match_operand:V16QI 2 "register_operand" "x")
8048 (mem:V16QI (match_dup 0))]
8050 "TARGET_SSE2 && !TARGET_64BIT"
8051 ;; @@@ check ordering of operands in intel/nonintel syntax
8052 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8053 [(set_attr "type" "ssemov")
8054 (set_attr "prefix_data16" "1")
8055 ;; The implicit %rdi operand confuses default length_vex computation.
8056 (set_attr "length_vex" "3")
8057 (set_attr "prefix" "maybe_vex")
8058 (set_attr "mode" "TI")])
8060 (define_insn "*sse2_maskmovdqu_rex64"
8061 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8062 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8063 (match_operand:V16QI 2 "register_operand" "x")
8064 (mem:V16QI (match_dup 0))]
8066 "TARGET_SSE2 && TARGET_64BIT"
8067 ;; @@@ check ordering of operands in intel/nonintel syntax
8068 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8069 [(set_attr "type" "ssemov")
8070 (set_attr "prefix_data16" "1")
8071 ;; The implicit %rdi operand confuses default length_vex computation.
8072 (set (attr "length_vex")
8073 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8074 (set_attr "prefix" "maybe_vex")
8075 (set_attr "mode" "TI")])
8077 (define_insn "sse_ldmxcsr"
8078 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8082 [(set_attr "type" "sse")
8083 (set_attr "atom_sse_attr" "mxcsr")
8084 (set_attr "prefix" "maybe_vex")
8085 (set_attr "memory" "load")])
8087 (define_insn "sse_stmxcsr"
8088 [(set (match_operand:SI 0 "memory_operand" "=m")
8089 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8092 [(set_attr "type" "sse")
8093 (set_attr "atom_sse_attr" "mxcsr")
8094 (set_attr "prefix" "maybe_vex")
8095 (set_attr "memory" "store")])
8097 (define_expand "sse_sfence"
8099 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8100 "TARGET_SSE || TARGET_3DNOW_A"
8102 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8103 MEM_VOLATILE_P (operands[0]) = 1;
8106 (define_insn "*sse_sfence"
8107 [(set (match_operand:BLK 0 "" "")
8108 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8109 "TARGET_SSE || TARGET_3DNOW_A"
8111 [(set_attr "type" "sse")
8112 (set_attr "length_address" "0")
8113 (set_attr "atom_sse_attr" "fence")
8114 (set_attr "memory" "unknown")])
8116 (define_insn "sse2_clflush"
8117 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8121 [(set_attr "type" "sse")
8122 (set_attr "atom_sse_attr" "fence")
8123 (set_attr "memory" "unknown")])
8125 (define_expand "sse2_mfence"
8127 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8130 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8131 MEM_VOLATILE_P (operands[0]) = 1;
8134 (define_insn "*sse2_mfence"
8135 [(set (match_operand:BLK 0 "" "")
8136 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8137 "TARGET_64BIT || TARGET_SSE2"
8139 [(set_attr "type" "sse")
8140 (set_attr "length_address" "0")
8141 (set_attr "atom_sse_attr" "fence")
8142 (set_attr "memory" "unknown")])
8144 (define_expand "sse2_lfence"
8146 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8149 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8150 MEM_VOLATILE_P (operands[0]) = 1;
8153 (define_insn "*sse2_lfence"
8154 [(set (match_operand:BLK 0 "" "")
8155 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8158 [(set_attr "type" "sse")
8159 (set_attr "length_address" "0")
8160 (set_attr "atom_sse_attr" "lfence")
8161 (set_attr "memory" "unknown")])
8163 (define_insn "sse3_mwait"
8164 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8165 (match_operand:SI 1 "register_operand" "c")]
8168 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8169 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8170 ;; we only need to set up 32bit registers.
8172 [(set_attr "length" "3")])
8174 (define_insn "sse3_monitor"
8175 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8176 (match_operand:SI 1 "register_operand" "c")
8177 (match_operand:SI 2 "register_operand" "d")]
8179 "TARGET_SSE3 && !TARGET_64BIT"
8180 "monitor\t%0, %1, %2"
8181 [(set_attr "length" "3")])
8183 (define_insn "sse3_monitor64"
8184 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8185 (match_operand:SI 1 "register_operand" "c")
8186 (match_operand:SI 2 "register_operand" "d")]
8188 "TARGET_SSE3 && TARGET_64BIT"
8189 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8190 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8191 ;; zero extended to 64bit, we only need to set up 32bit registers.
8193 [(set_attr "length" "3")])
8195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8197 ;; SSSE3 instructions
8199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8201 (define_insn "*avx_phaddwv8hi3"
8202 [(set (match_operand:V8HI 0 "register_operand" "=x")
8208 (match_operand:V8HI 1 "register_operand" "x")
8209 (parallel [(const_int 0)]))
8210 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8212 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8213 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8216 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8217 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8219 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8220 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8225 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8226 (parallel [(const_int 0)]))
8227 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8230 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8233 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8234 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8237 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8239 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8240 [(set_attr "type" "sseiadd")
8241 (set_attr "prefix_extra" "1")
8242 (set_attr "prefix" "vex")
8243 (set_attr "mode" "TI")])
8245 (define_insn "ssse3_phaddwv8hi3"
8246 [(set (match_operand:V8HI 0 "register_operand" "=x")
8252 (match_operand:V8HI 1 "register_operand" "0")
8253 (parallel [(const_int 0)]))
8254 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8257 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8261 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8264 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8269 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8270 (parallel [(const_int 0)]))
8271 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8278 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8283 "phaddw\t{%2, %0|%0, %2}"
8284 [(set_attr "type" "sseiadd")
8285 (set_attr "atom_unit" "complex")
8286 (set_attr "prefix_data16" "1")
8287 (set_attr "prefix_extra" "1")
8288 (set_attr "mode" "TI")])
8290 (define_insn "ssse3_phaddwv4hi3"
8291 [(set (match_operand:V4HI 0 "register_operand" "=y")
8296 (match_operand:V4HI 1 "register_operand" "0")
8297 (parallel [(const_int 0)]))
8298 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8300 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8305 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8306 (parallel [(const_int 0)]))
8307 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8309 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8310 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8312 "phaddw\t{%2, %0|%0, %2}"
8313 [(set_attr "type" "sseiadd")
8314 (set_attr "atom_unit" "complex")
8315 (set_attr "prefix_extra" "1")
8316 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8317 (set_attr "mode" "DI")])
8319 (define_insn "*avx_phadddv4si3"
8320 [(set (match_operand:V4SI 0 "register_operand" "=x")
8325 (match_operand:V4SI 1 "register_operand" "x")
8326 (parallel [(const_int 0)]))
8327 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8329 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8330 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8334 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8335 (parallel [(const_int 0)]))
8336 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8338 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8339 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8341 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8342 [(set_attr "type" "sseiadd")
8343 (set_attr "prefix_extra" "1")
8344 (set_attr "prefix" "vex")
8345 (set_attr "mode" "TI")])
8347 (define_insn "ssse3_phadddv4si3"
8348 [(set (match_operand:V4SI 0 "register_operand" "=x")
8353 (match_operand:V4SI 1 "register_operand" "0")
8354 (parallel [(const_int 0)]))
8355 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8357 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8358 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8362 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8363 (parallel [(const_int 0)]))
8364 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8366 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8367 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8369 "phaddd\t{%2, %0|%0, %2}"
8370 [(set_attr "type" "sseiadd")
8371 (set_attr "atom_unit" "complex")
8372 (set_attr "prefix_data16" "1")
8373 (set_attr "prefix_extra" "1")
8374 (set_attr "mode" "TI")])
8376 (define_insn "ssse3_phadddv2si3"
8377 [(set (match_operand:V2SI 0 "register_operand" "=y")
8381 (match_operand:V2SI 1 "register_operand" "0")
8382 (parallel [(const_int 0)]))
8383 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8386 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8387 (parallel [(const_int 0)]))
8388 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8390 "phaddd\t{%2, %0|%0, %2}"
8391 [(set_attr "type" "sseiadd")
8392 (set_attr "atom_unit" "complex")
8393 (set_attr "prefix_extra" "1")
8394 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8395 (set_attr "mode" "DI")])
8397 (define_insn "*avx_phaddswv8hi3"
8398 [(set (match_operand:V8HI 0 "register_operand" "=x")
8404 (match_operand:V8HI 1 "register_operand" "x")
8405 (parallel [(const_int 0)]))
8406 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8408 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8409 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8412 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8413 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8416 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8421 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8422 (parallel [(const_int 0)]))
8423 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8425 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8429 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8430 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8433 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8435 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8436 [(set_attr "type" "sseiadd")
8437 (set_attr "prefix_extra" "1")
8438 (set_attr "prefix" "vex")
8439 (set_attr "mode" "TI")])
8441 (define_insn "ssse3_phaddswv8hi3"
8442 [(set (match_operand:V8HI 0 "register_operand" "=x")
8448 (match_operand:V8HI 1 "register_operand" "0")
8449 (parallel [(const_int 0)]))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8452 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8453 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8456 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8459 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8465 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8466 (parallel [(const_int 0)]))
8467 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8474 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8479 "phaddsw\t{%2, %0|%0, %2}"
8480 [(set_attr "type" "sseiadd")
8481 (set_attr "atom_unit" "complex")
8482 (set_attr "prefix_data16" "1")
8483 (set_attr "prefix_extra" "1")
8484 (set_attr "mode" "TI")])
8486 (define_insn "ssse3_phaddswv4hi3"
8487 [(set (match_operand:V4HI 0 "register_operand" "=y")
8492 (match_operand:V4HI 1 "register_operand" "0")
8493 (parallel [(const_int 0)]))
8494 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8496 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8497 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8501 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8502 (parallel [(const_int 0)]))
8503 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8505 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8506 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8508 "phaddsw\t{%2, %0|%0, %2}"
8509 [(set_attr "type" "sseiadd")
8510 (set_attr "atom_unit" "complex")
8511 (set_attr "prefix_extra" "1")
8512 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8513 (set_attr "mode" "DI")])
8515 (define_insn "*avx_phsubwv8hi3"
8516 [(set (match_operand:V8HI 0 "register_operand" "=x")
8522 (match_operand:V8HI 1 "register_operand" "x")
8523 (parallel [(const_int 0)]))
8524 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8527 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8531 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8534 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8539 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8540 (parallel [(const_int 0)]))
8541 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8544 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8548 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8551 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8553 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8554 [(set_attr "type" "sseiadd")
8555 (set_attr "prefix_extra" "1")
8556 (set_attr "prefix" "vex")
8557 (set_attr "mode" "TI")])
8559 (define_insn "ssse3_phsubwv8hi3"
8560 [(set (match_operand:V8HI 0 "register_operand" "=x")
8566 (match_operand:V8HI 1 "register_operand" "0")
8567 (parallel [(const_int 0)]))
8568 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8571 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8574 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8575 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8578 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8583 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8584 (parallel [(const_int 0)]))
8585 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8587 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8588 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8592 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8595 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8597 "phsubw\t{%2, %0|%0, %2}"
8598 [(set_attr "type" "sseiadd")
8599 (set_attr "atom_unit" "complex")
8600 (set_attr "prefix_data16" "1")
8601 (set_attr "prefix_extra" "1")
8602 (set_attr "mode" "TI")])
8604 (define_insn "ssse3_phsubwv4hi3"
8605 [(set (match_operand:V4HI 0 "register_operand" "=y")
8610 (match_operand:V4HI 1 "register_operand" "0")
8611 (parallel [(const_int 0)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8619 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8620 (parallel [(const_int 0)]))
8621 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8626 "phsubw\t{%2, %0|%0, %2}"
8627 [(set_attr "type" "sseiadd")
8628 (set_attr "atom_unit" "complex")
8629 (set_attr "prefix_extra" "1")
8630 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8631 (set_attr "mode" "DI")])
8633 (define_insn "*avx_phsubdv4si3"
8634 [(set (match_operand:V4SI 0 "register_operand" "=x")
8639 (match_operand:V4SI 1 "register_operand" "x")
8640 (parallel [(const_int 0)]))
8641 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8643 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8644 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8648 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8649 (parallel [(const_int 0)]))
8650 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8652 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8653 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8655 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8656 [(set_attr "type" "sseiadd")
8657 (set_attr "prefix_extra" "1")
8658 (set_attr "prefix" "vex")
8659 (set_attr "mode" "TI")])
8661 (define_insn "ssse3_phsubdv4si3"
8662 [(set (match_operand:V4SI 0 "register_operand" "=x")
8667 (match_operand:V4SI 1 "register_operand" "0")
8668 (parallel [(const_int 0)]))
8669 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8671 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8672 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8676 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8677 (parallel [(const_int 0)]))
8678 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8680 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8681 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8683 "phsubd\t{%2, %0|%0, %2}"
8684 [(set_attr "type" "sseiadd")
8685 (set_attr "atom_unit" "complex")
8686 (set_attr "prefix_data16" "1")
8687 (set_attr "prefix_extra" "1")
8688 (set_attr "mode" "TI")])
8690 (define_insn "ssse3_phsubdv2si3"
8691 [(set (match_operand:V2SI 0 "register_operand" "=y")
8695 (match_operand:V2SI 1 "register_operand" "0")
8696 (parallel [(const_int 0)]))
8697 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8700 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8701 (parallel [(const_int 0)]))
8702 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8704 "phsubd\t{%2, %0|%0, %2}"
8705 [(set_attr "type" "sseiadd")
8706 (set_attr "atom_unit" "complex")
8707 (set_attr "prefix_extra" "1")
8708 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8709 (set_attr "mode" "DI")])
8711 (define_insn "*avx_phsubswv8hi3"
8712 [(set (match_operand:V8HI 0 "register_operand" "=x")
8718 (match_operand:V8HI 1 "register_operand" "x")
8719 (parallel [(const_int 0)]))
8720 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8722 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8723 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8726 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8727 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8730 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8735 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8736 (parallel [(const_int 0)]))
8737 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8739 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8743 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8744 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8746 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8747 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8749 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8750 [(set_attr "type" "sseiadd")
8751 (set_attr "prefix_extra" "1")
8752 (set_attr "prefix" "vex")
8753 (set_attr "mode" "TI")])
8755 (define_insn "ssse3_phsubswv8hi3"
8756 [(set (match_operand:V8HI 0 "register_operand" "=x")
8762 (match_operand:V8HI 1 "register_operand" "0")
8763 (parallel [(const_int 0)]))
8764 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8766 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8774 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8779 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8780 (parallel [(const_int 0)]))
8781 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8783 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8787 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8788 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8790 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8791 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8793 "phsubsw\t{%2, %0|%0, %2}"
8794 [(set_attr "type" "sseiadd")
8795 (set_attr "atom_unit" "complex")
8796 (set_attr "prefix_data16" "1")
8797 (set_attr "prefix_extra" "1")
8798 (set_attr "mode" "TI")])
8800 (define_insn "ssse3_phsubswv4hi3"
8801 [(set (match_operand:V4HI 0 "register_operand" "=y")
8806 (match_operand:V4HI 1 "register_operand" "0")
8807 (parallel [(const_int 0)]))
8808 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8810 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8811 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8815 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8816 (parallel [(const_int 0)]))
8817 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8819 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8822 "phsubsw\t{%2, %0|%0, %2}"
8823 [(set_attr "type" "sseiadd")
8824 (set_attr "atom_unit" "complex")
8825 (set_attr "prefix_extra" "1")
8826 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8827 (set_attr "mode" "DI")])
8829 (define_insn "*avx_pmaddubsw128"
8830 [(set (match_operand:V8HI 0 "register_operand" "=x")
8835 (match_operand:V16QI 1 "register_operand" "x")
8836 (parallel [(const_int 0)
8846 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8847 (parallel [(const_int 0)
8857 (vec_select:V16QI (match_dup 1)
8858 (parallel [(const_int 1)
8867 (vec_select:V16QI (match_dup 2)
8868 (parallel [(const_int 1)
8875 (const_int 15)]))))))]
8877 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8878 [(set_attr "type" "sseiadd")
8879 (set_attr "prefix_extra" "1")
8880 (set_attr "prefix" "vex")
8881 (set_attr "mode" "TI")])
8883 (define_insn "ssse3_pmaddubsw128"
8884 [(set (match_operand:V8HI 0 "register_operand" "=x")
8889 (match_operand:V16QI 1 "register_operand" "0")
8890 (parallel [(const_int 0)
8900 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8901 (parallel [(const_int 0)
8911 (vec_select:V16QI (match_dup 1)
8912 (parallel [(const_int 1)
8921 (vec_select:V16QI (match_dup 2)
8922 (parallel [(const_int 1)
8929 (const_int 15)]))))))]
8931 "pmaddubsw\t{%2, %0|%0, %2}"
8932 [(set_attr "type" "sseiadd")
8933 (set_attr "atom_unit" "simul")
8934 (set_attr "prefix_data16" "1")
8935 (set_attr "prefix_extra" "1")
8936 (set_attr "mode" "TI")])
8938 (define_insn "ssse3_pmaddubsw"
8939 [(set (match_operand:V4HI 0 "register_operand" "=y")
8944 (match_operand:V8QI 1 "register_operand" "0")
8945 (parallel [(const_int 0)
8951 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8952 (parallel [(const_int 0)
8958 (vec_select:V8QI (match_dup 1)
8959 (parallel [(const_int 1)
8964 (vec_select:V8QI (match_dup 2)
8965 (parallel [(const_int 1)
8968 (const_int 7)]))))))]
8970 "pmaddubsw\t{%2, %0|%0, %2}"
8971 [(set_attr "type" "sseiadd")
8972 (set_attr "atom_unit" "simul")
8973 (set_attr "prefix_extra" "1")
8974 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8975 (set_attr "mode" "DI")])
8977 (define_expand "ssse3_pmulhrswv8hi3"
8978 [(set (match_operand:V8HI 0 "register_operand" "")
8985 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8987 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8989 (const_vector:V8HI [(const_int 1) (const_int 1)
8990 (const_int 1) (const_int 1)
8991 (const_int 1) (const_int 1)
8992 (const_int 1) (const_int 1)]))
8995 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8997 (define_insn "*avx_pmulhrswv8hi3"
8998 [(set (match_operand:V8HI 0 "register_operand" "=x")
9005 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9007 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9009 (const_vector:V8HI [(const_int 1) (const_int 1)
9010 (const_int 1) (const_int 1)
9011 (const_int 1) (const_int 1)
9012 (const_int 1) (const_int 1)]))
9014 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9015 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9016 [(set_attr "type" "sseimul")
9017 (set_attr "prefix_extra" "1")
9018 (set_attr "prefix" "vex")
9019 (set_attr "mode" "TI")])
9021 (define_insn "*ssse3_pmulhrswv8hi3"
9022 [(set (match_operand:V8HI 0 "register_operand" "=x")
9029 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9031 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9033 (const_vector:V8HI [(const_int 1) (const_int 1)
9034 (const_int 1) (const_int 1)
9035 (const_int 1) (const_int 1)
9036 (const_int 1) (const_int 1)]))
9038 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9039 "pmulhrsw\t{%2, %0|%0, %2}"
9040 [(set_attr "type" "sseimul")
9041 (set_attr "prefix_data16" "1")
9042 (set_attr "prefix_extra" "1")
9043 (set_attr "mode" "TI")])
9045 (define_expand "ssse3_pmulhrswv4hi3"
9046 [(set (match_operand:V4HI 0 "register_operand" "")
9053 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9055 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9057 (const_vector:V4HI [(const_int 1) (const_int 1)
9058 (const_int 1) (const_int 1)]))
9061 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9063 (define_insn "*ssse3_pmulhrswv4hi3"
9064 [(set (match_operand:V4HI 0 "register_operand" "=y")
9071 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9073 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9075 (const_vector:V4HI [(const_int 1) (const_int 1)
9076 (const_int 1) (const_int 1)]))
9078 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9079 "pmulhrsw\t{%2, %0|%0, %2}"
9080 [(set_attr "type" "sseimul")
9081 (set_attr "prefix_extra" "1")
9082 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9083 (set_attr "mode" "DI")])
9085 (define_insn "*avx_pshufbv16qi3"
9086 [(set (match_operand:V16QI 0 "register_operand" "=x")
9087 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9088 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9091 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9092 [(set_attr "type" "sselog1")
9093 (set_attr "prefix_extra" "1")
9094 (set_attr "prefix" "vex")
9095 (set_attr "mode" "TI")])
9097 (define_insn "ssse3_pshufbv16qi3"
9098 [(set (match_operand:V16QI 0 "register_operand" "=x")
9099 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9100 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9103 "pshufb\t{%2, %0|%0, %2}";
9104 [(set_attr "type" "sselog1")
9105 (set_attr "prefix_data16" "1")
9106 (set_attr "prefix_extra" "1")
9107 (set_attr "mode" "TI")])
9109 (define_insn "ssse3_pshufbv8qi3"
9110 [(set (match_operand:V8QI 0 "register_operand" "=y")
9111 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9112 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9115 "pshufb\t{%2, %0|%0, %2}";
9116 [(set_attr "type" "sselog1")
9117 (set_attr "prefix_extra" "1")
9118 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9119 (set_attr "mode" "DI")])
9121 (define_insn "*avx_psign<mode>3"
9122 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9124 [(match_operand:SSEMODE124 1 "register_operand" "x")
9125 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9128 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9129 [(set_attr "type" "sselog1")
9130 (set_attr "prefix_extra" "1")
9131 (set_attr "prefix" "vex")
9132 (set_attr "mode" "TI")])
9134 (define_insn "ssse3_psign<mode>3"
9135 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9137 [(match_operand:SSEMODE124 1 "register_operand" "0")
9138 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9141 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9142 [(set_attr "type" "sselog1")
9143 (set_attr "prefix_data16" "1")
9144 (set_attr "prefix_extra" "1")
9145 (set_attr "mode" "TI")])
9147 (define_insn "ssse3_psign<mode>3"
9148 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9150 [(match_operand:MMXMODEI 1 "register_operand" "0")
9151 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9154 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9155 [(set_attr "type" "sselog1")
9156 (set_attr "prefix_extra" "1")
9157 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9158 (set_attr "mode" "DI")])
9160 (define_insn "*avx_palignrti"
9161 [(set (match_operand:TI 0 "register_operand" "=x")
9162 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9163 (match_operand:TI 2 "nonimmediate_operand" "xm")
9164 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9168 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9169 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9171 [(set_attr "type" "sseishft")
9172 (set_attr "prefix_extra" "1")
9173 (set_attr "length_immediate" "1")
9174 (set_attr "prefix" "vex")
9175 (set_attr "mode" "TI")])
9177 (define_insn "ssse3_palignrti"
9178 [(set (match_operand:TI 0 "register_operand" "=x")
9179 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9180 (match_operand:TI 2 "nonimmediate_operand" "xm")
9181 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9185 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9186 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9188 [(set_attr "type" "sseishft")
9189 (set_attr "atom_unit" "sishuf")
9190 (set_attr "prefix_data16" "1")
9191 (set_attr "prefix_extra" "1")
9192 (set_attr "length_immediate" "1")
9193 (set_attr "mode" "TI")])
9195 (define_insn "ssse3_palignrdi"
9196 [(set (match_operand:DI 0 "register_operand" "=y")
9197 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9198 (match_operand:DI 2 "nonimmediate_operand" "ym")
9199 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9203 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9204 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9206 [(set_attr "type" "sseishft")
9207 (set_attr "atom_unit" "sishuf")
9208 (set_attr "prefix_extra" "1")
9209 (set_attr "length_immediate" "1")
9210 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9211 (set_attr "mode" "DI")])
9213 (define_insn "abs<mode>2"
9214 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9215 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9217 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9218 [(set_attr "type" "sselog1")
9219 (set_attr "prefix_data16" "1")
9220 (set_attr "prefix_extra" "1")
9221 (set_attr "prefix" "maybe_vex")
9222 (set_attr "mode" "TI")])
9224 (define_insn "abs<mode>2"
9225 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9226 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9228 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9229 [(set_attr "type" "sselog1")
9230 (set_attr "prefix_rep" "0")
9231 (set_attr "prefix_extra" "1")
9232 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9233 (set_attr "mode" "DI")])
9235 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9237 ;; AMD SSE4A instructions
9239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9241 (define_insn "sse4a_movnt<mode>"
9242 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9244 [(match_operand:MODEF 1 "register_operand" "x")]
9247 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9248 [(set_attr "type" "ssemov")
9249 (set_attr "mode" "<MODE>")])
9251 (define_insn "sse4a_vmmovnt<mode>"
9252 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9253 (unspec:<ssescalarmode>
9254 [(vec_select:<ssescalarmode>
9255 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9256 (parallel [(const_int 0)]))]
9259 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9260 [(set_attr "type" "ssemov")
9261 (set_attr "mode" "<ssescalarmode>")])
9263 (define_insn "sse4a_extrqi"
9264 [(set (match_operand:V2DI 0 "register_operand" "=x")
9265 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9266 (match_operand 2 "const_int_operand" "")
9267 (match_operand 3 "const_int_operand" "")]
9270 "extrq\t{%3, %2, %0|%0, %2, %3}"
9271 [(set_attr "type" "sse")
9272 (set_attr "prefix_data16" "1")
9273 (set_attr "length_immediate" "2")
9274 (set_attr "mode" "TI")])
9276 (define_insn "sse4a_extrq"
9277 [(set (match_operand:V2DI 0 "register_operand" "=x")
9278 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9279 (match_operand:V16QI 2 "register_operand" "x")]
9282 "extrq\t{%2, %0|%0, %2}"
9283 [(set_attr "type" "sse")
9284 (set_attr "prefix_data16" "1")
9285 (set_attr "mode" "TI")])
9287 (define_insn "sse4a_insertqi"
9288 [(set (match_operand:V2DI 0 "register_operand" "=x")
9289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9290 (match_operand:V2DI 2 "register_operand" "x")
9291 (match_operand 3 "const_int_operand" "")
9292 (match_operand 4 "const_int_operand" "")]
9295 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9296 [(set_attr "type" "sseins")
9297 (set_attr "prefix_data16" "0")
9298 (set_attr "prefix_rep" "1")
9299 (set_attr "length_immediate" "2")
9300 (set_attr "mode" "TI")])
9302 (define_insn "sse4a_insertq"
9303 [(set (match_operand:V2DI 0 "register_operand" "=x")
9304 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9305 (match_operand:V2DI 2 "register_operand" "x")]
9308 "insertq\t{%2, %0|%0, %2}"
9309 [(set_attr "type" "sseins")
9310 (set_attr "prefix_data16" "0")
9311 (set_attr "prefix_rep" "1")
9312 (set_attr "mode" "TI")])
9314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9316 ;; Intel SSE4.1 instructions
9318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9320 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9321 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9322 (vec_merge:AVXMODEF2P
9323 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9324 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9325 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9327 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9328 [(set_attr "type" "ssemov")
9329 (set_attr "prefix_extra" "1")
9330 (set_attr "length_immediate" "1")
9331 (set_attr "prefix" "vex")
9332 (set_attr "mode" "<avxvecmode>")])
9334 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9335 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9337 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9338 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9339 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9342 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9343 [(set_attr "type" "ssemov")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "length_immediate" "1")
9346 (set_attr "prefix" "vex")
9347 (set_attr "mode" "<avxvecmode>")])
9349 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9350 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9351 (vec_merge:SSEMODEF2P
9352 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9353 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9354 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9356 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9357 [(set_attr "type" "ssemov")
9358 (set_attr "prefix_data16" "1")
9359 (set_attr "prefix_extra" "1")
9360 (set_attr "length_immediate" "1")
9361 (set_attr "mode" "<MODE>")])
9363 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9364 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9366 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9367 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9368 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9371 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9372 [(set_attr "type" "ssemov")
9373 (set_attr "prefix_data16" "1")
9374 (set_attr "prefix_extra" "1")
9375 (set_attr "mode" "<MODE>")])
9377 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9378 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9380 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9381 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9382 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9385 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9386 [(set_attr "type" "ssemul")
9387 (set_attr "prefix" "vex")
9388 (set_attr "prefix_extra" "1")
9389 (set_attr "length_immediate" "1")
9390 (set_attr "mode" "<avxvecmode>")])
9392 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9393 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9395 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9396 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9397 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9400 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9401 [(set_attr "type" "ssemul")
9402 (set_attr "prefix_data16" "1")
9403 (set_attr "prefix_extra" "1")
9404 (set_attr "length_immediate" "1")
9405 (set_attr "mode" "<MODE>")])
9407 (define_insn "sse4_1_movntdqa"
9408 [(set (match_operand:V2DI 0 "register_operand" "=x")
9409 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9412 "%vmovntdqa\t{%1, %0|%0, %1}"
9413 [(set_attr "type" "ssemov")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "prefix" "maybe_vex")
9416 (set_attr "mode" "TI")])
9418 (define_insn "*avx_mpsadbw"
9419 [(set (match_operand:V16QI 0 "register_operand" "=x")
9420 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9421 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9422 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9425 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9426 [(set_attr "type" "sselog1")
9427 (set_attr "prefix" "vex")
9428 (set_attr "prefix_extra" "1")
9429 (set_attr "length_immediate" "1")
9430 (set_attr "mode" "TI")])
9432 (define_insn "sse4_1_mpsadbw"
9433 [(set (match_operand:V16QI 0 "register_operand" "=x")
9434 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9435 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9436 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9439 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9440 [(set_attr "type" "sselog1")
9441 (set_attr "prefix_extra" "1")
9442 (set_attr "length_immediate" "1")
9443 (set_attr "mode" "TI")])
9445 (define_insn "*avx_packusdw"
9446 [(set (match_operand:V8HI 0 "register_operand" "=x")
9449 (match_operand:V4SI 1 "register_operand" "x"))
9451 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9453 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9454 [(set_attr "type" "sselog")
9455 (set_attr "prefix_extra" "1")
9456 (set_attr "prefix" "vex")
9457 (set_attr "mode" "TI")])
9459 (define_insn "sse4_1_packusdw"
9460 [(set (match_operand:V8HI 0 "register_operand" "=x")
9463 (match_operand:V4SI 1 "register_operand" "0"))
9465 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9467 "packusdw\t{%2, %0|%0, %2}"
9468 [(set_attr "type" "sselog")
9469 (set_attr "prefix_extra" "1")
9470 (set_attr "mode" "TI")])
9472 (define_insn "*avx_pblendvb"
9473 [(set (match_operand:V16QI 0 "register_operand" "=x")
9474 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9475 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9476 (match_operand:V16QI 3 "register_operand" "x")]
9479 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9480 [(set_attr "type" "ssemov")
9481 (set_attr "prefix_extra" "1")
9482 (set_attr "length_immediate" "1")
9483 (set_attr "prefix" "vex")
9484 (set_attr "mode" "TI")])
9486 (define_insn "sse4_1_pblendvb"
9487 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9488 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9489 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9490 (match_operand:V16QI 3 "register_operand" "Yz")]
9493 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9494 [(set_attr "type" "ssemov")
9495 (set_attr "prefix_extra" "1")
9496 (set_attr "mode" "TI")])
9498 (define_insn "*avx_pblendw"
9499 [(set (match_operand:V8HI 0 "register_operand" "=x")
9501 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9502 (match_operand:V8HI 1 "register_operand" "x")
9503 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9505 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9506 [(set_attr "type" "ssemov")
9507 (set_attr "prefix" "vex")
9508 (set_attr "prefix_extra" "1")
9509 (set_attr "length_immediate" "1")
9510 (set_attr "mode" "TI")])
9512 (define_insn "sse4_1_pblendw"
9513 [(set (match_operand:V8HI 0 "register_operand" "=x")
9515 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9516 (match_operand:V8HI 1 "register_operand" "0")
9517 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9519 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9520 [(set_attr "type" "ssemov")
9521 (set_attr "prefix_extra" "1")
9522 (set_attr "length_immediate" "1")
9523 (set_attr "mode" "TI")])
9525 (define_insn "sse4_1_phminposuw"
9526 [(set (match_operand:V8HI 0 "register_operand" "=x")
9527 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9528 UNSPEC_PHMINPOSUW))]
9530 "%vphminposuw\t{%1, %0|%0, %1}"
9531 [(set_attr "type" "sselog1")
9532 (set_attr "prefix_extra" "1")
9533 (set_attr "prefix" "maybe_vex")
9534 (set_attr "mode" "TI")])
9536 (define_insn "sse4_1_extendv8qiv8hi2"
9537 [(set (match_operand:V8HI 0 "register_operand" "=x")
9540 (match_operand:V16QI 1 "register_operand" "x")
9541 (parallel [(const_int 0)
9550 "%vpmovsxbw\t{%1, %0|%0, %1}"
9551 [(set_attr "type" "ssemov")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "prefix" "maybe_vex")
9554 (set_attr "mode" "TI")])
9556 (define_insn "*sse4_1_extendv8qiv8hi2"
9557 [(set (match_operand:V8HI 0 "register_operand" "=x")
9560 (vec_duplicate:V16QI
9561 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9562 (parallel [(const_int 0)
9571 "%vpmovsxbw\t{%1, %0|%0, %1}"
9572 [(set_attr "type" "ssemov")
9573 (set_attr "prefix_extra" "1")
9574 (set_attr "prefix" "maybe_vex")
9575 (set_attr "mode" "TI")])
9577 (define_insn "sse4_1_extendv4qiv4si2"
9578 [(set (match_operand:V4SI 0 "register_operand" "=x")
9581 (match_operand:V16QI 1 "register_operand" "x")
9582 (parallel [(const_int 0)
9587 "%vpmovsxbd\t{%1, %0|%0, %1}"
9588 [(set_attr "type" "ssemov")
9589 (set_attr "prefix_extra" "1")
9590 (set_attr "prefix" "maybe_vex")
9591 (set_attr "mode" "TI")])
9593 (define_insn "*sse4_1_extendv4qiv4si2"
9594 [(set (match_operand:V4SI 0 "register_operand" "=x")
9597 (vec_duplicate:V16QI
9598 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9599 (parallel [(const_int 0)
9604 "%vpmovsxbd\t{%1, %0|%0, %1}"
9605 [(set_attr "type" "ssemov")
9606 (set_attr "prefix_extra" "1")
9607 (set_attr "prefix" "maybe_vex")
9608 (set_attr "mode" "TI")])
9610 (define_insn "sse4_1_extendv2qiv2di2"
9611 [(set (match_operand:V2DI 0 "register_operand" "=x")
9614 (match_operand:V16QI 1 "register_operand" "x")
9615 (parallel [(const_int 0)
9618 "%vpmovsxbq\t{%1, %0|%0, %1}"
9619 [(set_attr "type" "ssemov")
9620 (set_attr "prefix_extra" "1")
9621 (set_attr "prefix" "maybe_vex")
9622 (set_attr "mode" "TI")])
9624 (define_insn "*sse4_1_extendv2qiv2di2"
9625 [(set (match_operand:V2DI 0 "register_operand" "=x")
9628 (vec_duplicate:V16QI
9629 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9630 (parallel [(const_int 0)
9633 "%vpmovsxbq\t{%1, %0|%0, %1}"
9634 [(set_attr "type" "ssemov")
9635 (set_attr "prefix_extra" "1")
9636 (set_attr "prefix" "maybe_vex")
9637 (set_attr "mode" "TI")])
9639 (define_insn "sse4_1_extendv4hiv4si2"
9640 [(set (match_operand:V4SI 0 "register_operand" "=x")
9643 (match_operand:V8HI 1 "register_operand" "x")
9644 (parallel [(const_int 0)
9649 "%vpmovsxwd\t{%1, %0|%0, %1}"
9650 [(set_attr "type" "ssemov")
9651 (set_attr "prefix_extra" "1")
9652 (set_attr "prefix" "maybe_vex")
9653 (set_attr "mode" "TI")])
9655 (define_insn "*sse4_1_extendv4hiv4si2"
9656 [(set (match_operand:V4SI 0 "register_operand" "=x")
9660 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9661 (parallel [(const_int 0)
9666 "%vpmovsxwd\t{%1, %0|%0, %1}"
9667 [(set_attr "type" "ssemov")
9668 (set_attr "prefix_extra" "1")
9669 (set_attr "prefix" "maybe_vex")
9670 (set_attr "mode" "TI")])
9672 (define_insn "sse4_1_extendv2hiv2di2"
9673 [(set (match_operand:V2DI 0 "register_operand" "=x")
9676 (match_operand:V8HI 1 "register_operand" "x")
9677 (parallel [(const_int 0)
9680 "%vpmovsxwq\t{%1, %0|%0, %1}"
9681 [(set_attr "type" "ssemov")
9682 (set_attr "prefix_extra" "1")
9683 (set_attr "prefix" "maybe_vex")
9684 (set_attr "mode" "TI")])
9686 (define_insn "*sse4_1_extendv2hiv2di2"
9687 [(set (match_operand:V2DI 0 "register_operand" "=x")
9691 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9692 (parallel [(const_int 0)
9695 "%vpmovsxwq\t{%1, %0|%0, %1}"
9696 [(set_attr "type" "ssemov")
9697 (set_attr "prefix_extra" "1")
9698 (set_attr "prefix" "maybe_vex")
9699 (set_attr "mode" "TI")])
9701 (define_insn "sse4_1_extendv2siv2di2"
9702 [(set (match_operand:V2DI 0 "register_operand" "=x")
9705 (match_operand:V4SI 1 "register_operand" "x")
9706 (parallel [(const_int 0)
9709 "%vpmovsxdq\t{%1, %0|%0, %1}"
9710 [(set_attr "type" "ssemov")
9711 (set_attr "prefix_extra" "1")
9712 (set_attr "prefix" "maybe_vex")
9713 (set_attr "mode" "TI")])
9715 (define_insn "*sse4_1_extendv2siv2di2"
9716 [(set (match_operand:V2DI 0 "register_operand" "=x")
9720 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9721 (parallel [(const_int 0)
9724 "%vpmovsxdq\t{%1, %0|%0, %1}"
9725 [(set_attr "type" "ssemov")
9726 (set_attr "prefix_extra" "1")
9727 (set_attr "prefix" "maybe_vex")
9728 (set_attr "mode" "TI")])
9730 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9731 [(set (match_operand:V8HI 0 "register_operand" "=x")
9734 (match_operand:V16QI 1 "register_operand" "x")
9735 (parallel [(const_int 0)
9744 "%vpmovzxbw\t{%1, %0|%0, %1}"
9745 [(set_attr "type" "ssemov")
9746 (set_attr "prefix_extra" "1")
9747 (set_attr "prefix" "maybe_vex")
9748 (set_attr "mode" "TI")])
9750 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9751 [(set (match_operand:V8HI 0 "register_operand" "=x")
9754 (vec_duplicate:V16QI
9755 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9756 (parallel [(const_int 0)
9765 "%vpmovzxbw\t{%1, %0|%0, %1}"
9766 [(set_attr "type" "ssemov")
9767 (set_attr "prefix_extra" "1")
9768 (set_attr "prefix" "maybe_vex")
9769 (set_attr "mode" "TI")])
9771 (define_insn "sse4_1_zero_extendv4qiv4si2"
9772 [(set (match_operand:V4SI 0 "register_operand" "=x")
9775 (match_operand:V16QI 1 "register_operand" "x")
9776 (parallel [(const_int 0)
9781 "%vpmovzxbd\t{%1, %0|%0, %1}"
9782 [(set_attr "type" "ssemov")
9783 (set_attr "prefix_extra" "1")
9784 (set_attr "prefix" "maybe_vex")
9785 (set_attr "mode" "TI")])
9787 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9788 [(set (match_operand:V4SI 0 "register_operand" "=x")
9791 (vec_duplicate:V16QI
9792 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9793 (parallel [(const_int 0)
9798 "%vpmovzxbd\t{%1, %0|%0, %1}"
9799 [(set_attr "type" "ssemov")
9800 (set_attr "prefix_extra" "1")
9801 (set_attr "prefix" "maybe_vex")
9802 (set_attr "mode" "TI")])
9804 (define_insn "sse4_1_zero_extendv2qiv2di2"
9805 [(set (match_operand:V2DI 0 "register_operand" "=x")
9808 (match_operand:V16QI 1 "register_operand" "x")
9809 (parallel [(const_int 0)
9812 "%vpmovzxbq\t{%1, %0|%0, %1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "prefix" "maybe_vex")
9816 (set_attr "mode" "TI")])
9818 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9819 [(set (match_operand:V2DI 0 "register_operand" "=x")
9822 (vec_duplicate:V16QI
9823 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9824 (parallel [(const_int 0)
9827 "%vpmovzxbq\t{%1, %0|%0, %1}"
9828 [(set_attr "type" "ssemov")
9829 (set_attr "prefix_extra" "1")
9830 (set_attr "prefix" "maybe_vex")
9831 (set_attr "mode" "TI")])
9833 (define_insn "sse4_1_zero_extendv4hiv4si2"
9834 [(set (match_operand:V4SI 0 "register_operand" "=x")
9837 (match_operand:V8HI 1 "register_operand" "x")
9838 (parallel [(const_int 0)
9843 "%vpmovzxwd\t{%1, %0|%0, %1}"
9844 [(set_attr "type" "ssemov")
9845 (set_attr "prefix_extra" "1")
9846 (set_attr "prefix" "maybe_vex")
9847 (set_attr "mode" "TI")])
9849 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9850 [(set (match_operand:V4SI 0 "register_operand" "=x")
9854 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9855 (parallel [(const_int 0)
9860 "%vpmovzxwd\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "ssemov")
9862 (set_attr "prefix_extra" "1")
9863 (set_attr "prefix" "maybe_vex")
9864 (set_attr "mode" "TI")])
9866 (define_insn "sse4_1_zero_extendv2hiv2di2"
9867 [(set (match_operand:V2DI 0 "register_operand" "=x")
9870 (match_operand:V8HI 1 "register_operand" "x")
9871 (parallel [(const_int 0)
9874 "%vpmovzxwq\t{%1, %0|%0, %1}"
9875 [(set_attr "type" "ssemov")
9876 (set_attr "prefix_extra" "1")
9877 (set_attr "prefix" "maybe_vex")
9878 (set_attr "mode" "TI")])
9880 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9881 [(set (match_operand:V2DI 0 "register_operand" "=x")
9885 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9886 (parallel [(const_int 0)
9889 "%vpmovzxwq\t{%1, %0|%0, %1}"
9890 [(set_attr "type" "ssemov")
9891 (set_attr "prefix_extra" "1")
9892 (set_attr "prefix" "maybe_vex")
9893 (set_attr "mode" "TI")])
9895 (define_insn "sse4_1_zero_extendv2siv2di2"
9896 [(set (match_operand:V2DI 0 "register_operand" "=x")
9899 (match_operand:V4SI 1 "register_operand" "x")
9900 (parallel [(const_int 0)
9903 "%vpmovzxdq\t{%1, %0|%0, %1}"
9904 [(set_attr "type" "ssemov")
9905 (set_attr "prefix_extra" "1")
9906 (set_attr "prefix" "maybe_vex")
9907 (set_attr "mode" "TI")])
9909 (define_insn "*sse4_1_zero_extendv2siv2di2"
9910 [(set (match_operand:V2DI 0 "register_operand" "=x")
9914 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9915 (parallel [(const_int 0)
9918 "%vpmovzxdq\t{%1, %0|%0, %1}"
9919 [(set_attr "type" "ssemov")
9920 (set_attr "prefix_extra" "1")
9921 (set_attr "prefix" "maybe_vex")
9922 (set_attr "mode" "TI")])
9924 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9925 ;; setting FLAGS_REG. But it is not a really compare instruction.
9926 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9927 [(set (reg:CC FLAGS_REG)
9928 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9929 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9932 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9933 [(set_attr "type" "ssecomi")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "prefix" "vex")
9936 (set_attr "mode" "<MODE>")])
9938 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9939 ;; But it is not a really compare instruction.
9940 (define_insn "avx_ptest256"
9941 [(set (reg:CC FLAGS_REG)
9942 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9943 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9946 "vptest\t{%1, %0|%0, %1}"
9947 [(set_attr "type" "ssecomi")
9948 (set_attr "prefix_extra" "1")
9949 (set_attr "prefix" "vex")
9950 (set_attr "mode" "OI")])
9952 (define_insn "sse4_1_ptest"
9953 [(set (reg:CC FLAGS_REG)
9954 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9955 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9958 "%vptest\t{%1, %0|%0, %1}"
9959 [(set_attr "type" "ssecomi")
9960 (set_attr "prefix_extra" "1")
9961 (set_attr "prefix" "maybe_vex")
9962 (set_attr "mode" "TI")])
9964 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9965 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9966 (unspec:AVX256MODEF2P
9967 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9968 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9971 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9972 [(set_attr "type" "ssecvt")
9973 (set_attr "prefix_extra" "1")
9974 (set_attr "length_immediate" "1")
9975 (set_attr "prefix" "vex")
9976 (set_attr "mode" "<MODE>")])
9978 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9979 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9981 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9982 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9985 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9986 [(set_attr "type" "ssecvt")
9987 (set_attr "prefix_data16" "1")
9988 (set_attr "prefix_extra" "1")
9989 (set_attr "length_immediate" "1")
9990 (set_attr "prefix" "maybe_vex")
9991 (set_attr "mode" "<MODE>")])
9993 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9994 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9995 (vec_merge:SSEMODEF2P
9997 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9998 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10000 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10003 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10004 [(set_attr "type" "ssecvt")
10005 (set_attr "prefix_extra" "1")
10006 (set_attr "length_immediate" "1")
10007 (set_attr "prefix" "vex")
10008 (set_attr "mode" "<MODE>")])
10010 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
10011 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10012 (vec_merge:SSEMODEF2P
10014 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10015 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10017 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10020 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10021 [(set_attr "type" "ssecvt")
10022 (set_attr "prefix_data16" "1")
10023 (set_attr "prefix_extra" "1")
10024 (set_attr "length_immediate" "1")
10025 (set_attr "mode" "<MODE>")])
10027 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10029 ;; Intel SSE4.2 string/text processing instructions
10031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10033 (define_insn_and_split "sse4_2_pcmpestr"
10034 [(set (match_operand:SI 0 "register_operand" "=c,c")
10036 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10037 (match_operand:SI 3 "register_operand" "a,a")
10038 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10039 (match_operand:SI 5 "register_operand" "d,d")
10040 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10042 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10050 (set (reg:CC FLAGS_REG)
10059 && can_create_pseudo_p ()"
10064 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10065 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10066 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10069 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10070 operands[3], operands[4],
10071 operands[5], operands[6]));
10073 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10074 operands[3], operands[4],
10075 operands[5], operands[6]));
10076 if (flags && !(ecx || xmm0))
10077 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10078 operands[2], operands[3],
10079 operands[4], operands[5],
10083 [(set_attr "type" "sselog")
10084 (set_attr "prefix_data16" "1")
10085 (set_attr "prefix_extra" "1")
10086 (set_attr "length_immediate" "1")
10087 (set_attr "memory" "none,load")
10088 (set_attr "mode" "TI")])
10090 (define_insn "sse4_2_pcmpestri"
10091 [(set (match_operand:SI 0 "register_operand" "=c,c")
10093 [(match_operand:V16QI 1 "register_operand" "x,x")
10094 (match_operand:SI 2 "register_operand" "a,a")
10095 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10096 (match_operand:SI 4 "register_operand" "d,d")
10097 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10099 (set (reg:CC FLAGS_REG)
10108 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10109 [(set_attr "type" "sselog")
10110 (set_attr "prefix_data16" "1")
10111 (set_attr "prefix_extra" "1")
10112 (set_attr "prefix" "maybe_vex")
10113 (set_attr "length_immediate" "1")
10114 (set_attr "memory" "none,load")
10115 (set_attr "mode" "TI")])
10117 (define_insn "sse4_2_pcmpestrm"
10118 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10120 [(match_operand:V16QI 1 "register_operand" "x,x")
10121 (match_operand:SI 2 "register_operand" "a,a")
10122 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10123 (match_operand:SI 4 "register_operand" "d,d")
10124 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10126 (set (reg:CC FLAGS_REG)
10135 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10136 [(set_attr "type" "sselog")
10137 (set_attr "prefix_data16" "1")
10138 (set_attr "prefix_extra" "1")
10139 (set_attr "length_immediate" "1")
10140 (set_attr "prefix" "maybe_vex")
10141 (set_attr "memory" "none,load")
10142 (set_attr "mode" "TI")])
10144 (define_insn "sse4_2_pcmpestr_cconly"
10145 [(set (reg:CC FLAGS_REG)
10147 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10148 (match_operand:SI 3 "register_operand" "a,a,a,a")
10149 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10150 (match_operand:SI 5 "register_operand" "d,d,d,d")
10151 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10153 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10154 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10157 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10158 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10159 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10160 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10161 [(set_attr "type" "sselog")
10162 (set_attr "prefix_data16" "1")
10163 (set_attr "prefix_extra" "1")
10164 (set_attr "length_immediate" "1")
10165 (set_attr "memory" "none,load,none,load")
10166 (set_attr "prefix" "maybe_vex")
10167 (set_attr "mode" "TI")])
10169 (define_insn_and_split "sse4_2_pcmpistr"
10170 [(set (match_operand:SI 0 "register_operand" "=c,c")
10172 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10173 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10174 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10176 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10182 (set (reg:CC FLAGS_REG)
10189 && can_create_pseudo_p ()"
10194 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10195 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10196 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10199 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10200 operands[3], operands[4]));
10202 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10203 operands[3], operands[4]));
10204 if (flags && !(ecx || xmm0))
10205 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10206 operands[2], operands[3],
10210 [(set_attr "type" "sselog")
10211 (set_attr "prefix_data16" "1")
10212 (set_attr "prefix_extra" "1")
10213 (set_attr "length_immediate" "1")
10214 (set_attr "memory" "none,load")
10215 (set_attr "mode" "TI")])
10217 (define_insn "sse4_2_pcmpistri"
10218 [(set (match_operand:SI 0 "register_operand" "=c,c")
10220 [(match_operand:V16QI 1 "register_operand" "x,x")
10221 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10222 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10224 (set (reg:CC FLAGS_REG)
10231 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10232 [(set_attr "type" "sselog")
10233 (set_attr "prefix_data16" "1")
10234 (set_attr "prefix_extra" "1")
10235 (set_attr "length_immediate" "1")
10236 (set_attr "prefix" "maybe_vex")
10237 (set_attr "memory" "none,load")
10238 (set_attr "mode" "TI")])
10240 (define_insn "sse4_2_pcmpistrm"
10241 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10243 [(match_operand:V16QI 1 "register_operand" "x,x")
10244 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10245 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10247 (set (reg:CC FLAGS_REG)
10254 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "prefix" "maybe_vex")
10260 (set_attr "memory" "none,load")
10261 (set_attr "mode" "TI")])
10263 (define_insn "sse4_2_pcmpistr_cconly"
10264 [(set (reg:CC FLAGS_REG)
10266 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10267 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10268 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10270 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10271 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10274 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10275 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10276 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10277 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10278 [(set_attr "type" "sselog")
10279 (set_attr "prefix_data16" "1")
10280 (set_attr "prefix_extra" "1")
10281 (set_attr "length_immediate" "1")
10282 (set_attr "memory" "none,load,none,load")
10283 (set_attr "prefix" "maybe_vex")
10284 (set_attr "mode" "TI")])
10286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10288 ;; XOP instructions
10290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10292 ;; XOP parallel integer multiply/add instructions.
10293 ;; Note the XOP multiply/add instructions
10294 ;; a[i] = b[i] * c[i] + d[i];
10295 ;; do not allow the value being added to be a memory operation.
10296 (define_insn "xop_pmacsww"
10297 [(set (match_operand:V8HI 0 "register_operand" "=x")
10300 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10301 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10302 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10304 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10305 [(set_attr "type" "ssemuladd")
10306 (set_attr "mode" "TI")])
10308 (define_insn "xop_pmacssww"
10309 [(set (match_operand:V8HI 0 "register_operand" "=x")
10311 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10312 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10313 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10315 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10316 [(set_attr "type" "ssemuladd")
10317 (set_attr "mode" "TI")])
10319 (define_insn "xop_pmacsdd"
10320 [(set (match_operand:V4SI 0 "register_operand" "=x")
10323 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10324 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10325 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10327 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10328 [(set_attr "type" "ssemuladd")
10329 (set_attr "mode" "TI")])
10331 (define_insn "xop_pmacssdd"
10332 [(set (match_operand:V4SI 0 "register_operand" "=x")
10334 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10335 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10336 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10338 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10339 [(set_attr "type" "ssemuladd")
10340 (set_attr "mode" "TI")])
10342 (define_insn "xop_pmacssdql"
10343 [(set (match_operand:V2DI 0 "register_operand" "=x")
10348 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10349 (parallel [(const_int 1)
10352 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10353 (parallel [(const_int 1)
10355 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10357 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10358 [(set_attr "type" "ssemuladd")
10359 (set_attr "mode" "TI")])
10361 (define_insn "xop_pmacssdqh"
10362 [(set (match_operand:V2DI 0 "register_operand" "=x")
10367 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10368 (parallel [(const_int 0)
10372 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10373 (parallel [(const_int 0)
10375 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10377 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10378 [(set_attr "type" "ssemuladd")
10379 (set_attr "mode" "TI")])
10381 (define_insn "xop_pmacsdql"
10382 [(set (match_operand:V2DI 0 "register_operand" "=x")
10387 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10388 (parallel [(const_int 1)
10392 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10393 (parallel [(const_int 1)
10395 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10397 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10398 [(set_attr "type" "ssemuladd")
10399 (set_attr "mode" "TI")])
10401 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10402 ;; fake it with a multiply/add. In general, we expect the define_split to
10403 ;; occur before register allocation, so we have to handle the corner case where
10404 ;; the target is the same as operands 1/2
10405 (define_insn_and_split "xop_mulv2div2di3_low"
10406 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10410 (match_operand:V4SI 1 "register_operand" "%x")
10411 (parallel [(const_int 1)
10415 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10416 (parallel [(const_int 1)
10417 (const_int 3)])))))]
10420 "&& reload_completed"
10421 [(set (match_dup 0)
10429 (parallel [(const_int 1)
10434 (parallel [(const_int 1)
10438 operands[3] = CONST0_RTX (V2DImode);
10440 [(set_attr "type" "ssemul")
10441 (set_attr "mode" "TI")])
10443 (define_insn "xop_pmacsdqh"
10444 [(set (match_operand:V2DI 0 "register_operand" "=x")
10449 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10450 (parallel [(const_int 0)
10454 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10455 (parallel [(const_int 0)
10457 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10459 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10460 [(set_attr "type" "ssemuladd")
10461 (set_attr "mode" "TI")])
10463 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10464 ;; fake it with a multiply/add. In general, we expect the define_split to
10465 ;; occur before register allocation, so we have to handle the corner case where
10466 ;; the target is the same as either operands[1] or operands[2]
10467 (define_insn_and_split "xop_mulv2div2di3_high"
10468 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10472 (match_operand:V4SI 1 "register_operand" "%x")
10473 (parallel [(const_int 0)
10477 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10478 (parallel [(const_int 0)
10479 (const_int 2)])))))]
10482 "&& reload_completed"
10483 [(set (match_dup 0)
10491 (parallel [(const_int 0)
10496 (parallel [(const_int 0)
10500 operands[3] = CONST0_RTX (V2DImode);
10502 [(set_attr "type" "ssemul")
10503 (set_attr "mode" "TI")])
10505 ;; XOP parallel integer multiply/add instructions for the intrinisics
10506 (define_insn "xop_pmacsswd"
10507 [(set (match_operand:V4SI 0 "register_operand" "=x")
10512 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10513 (parallel [(const_int 1)
10519 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10520 (parallel [(const_int 1)
10524 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10526 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10527 [(set_attr "type" "ssemuladd")
10528 (set_attr "mode" "TI")])
10530 (define_insn "xop_pmacswd"
10531 [(set (match_operand:V4SI 0 "register_operand" "=x")
10536 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10537 (parallel [(const_int 1)
10543 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10544 (parallel [(const_int 1)
10548 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10550 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10551 [(set_attr "type" "ssemuladd")
10552 (set_attr "mode" "TI")])
10554 (define_insn "xop_pmadcsswd"
10555 [(set (match_operand:V4SI 0 "register_operand" "=x")
10561 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10562 (parallel [(const_int 0)
10568 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10569 (parallel [(const_int 0)
10577 (parallel [(const_int 1)
10584 (parallel [(const_int 1)
10587 (const_int 7)])))))
10588 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10590 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10591 [(set_attr "type" "ssemuladd")
10592 (set_attr "mode" "TI")])
10594 (define_insn "xop_pmadcswd"
10595 [(set (match_operand:V4SI 0 "register_operand" "=x")
10601 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10602 (parallel [(const_int 0)
10608 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10609 (parallel [(const_int 0)
10617 (parallel [(const_int 1)
10624 (parallel [(const_int 1)
10627 (const_int 7)])))))
10628 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10630 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10631 [(set_attr "type" "ssemuladd")
10632 (set_attr "mode" "TI")])
10634 ;; XOP parallel XMM conditional moves
10635 (define_insn "xop_pcmov_<mode>"
10636 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10637 (if_then_else:SSEMODE
10638 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10639 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10640 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10642 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10643 [(set_attr "type" "sse4arg")])
10645 (define_insn "xop_pcmov_<mode>256"
10646 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10647 (if_then_else:AVX256MODE
10648 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10649 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10650 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10652 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10653 [(set_attr "type" "sse4arg")])
10655 ;; XOP horizontal add/subtract instructions
10656 (define_insn "xop_phaddbw"
10657 [(set (match_operand:V8HI 0 "register_operand" "=x")
10661 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10662 (parallel [(const_int 0)
10673 (parallel [(const_int 1)
10680 (const_int 15)])))))]
10682 "vphaddbw\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "sseiadd1")])
10685 (define_insn "xop_phaddbd"
10686 [(set (match_operand:V4SI 0 "register_operand" "=x")
10691 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10692 (parallel [(const_int 0)
10699 (parallel [(const_int 1)
10702 (const_int 13)]))))
10707 (parallel [(const_int 2)
10714 (parallel [(const_int 3)
10717 (const_int 15)]))))))]
10719 "vphaddbd\t{%1, %0|%0, %1}"
10720 [(set_attr "type" "sseiadd1")])
10722 (define_insn "xop_phaddbq"
10723 [(set (match_operand:V2DI 0 "register_operand" "=x")
10729 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10730 (parallel [(const_int 0)
10735 (parallel [(const_int 1)
10741 (parallel [(const_int 2)
10746 (parallel [(const_int 3)
10747 (const_int 7)])))))
10753 (parallel [(const_int 8)
10758 (parallel [(const_int 9)
10759 (const_int 13)]))))
10764 (parallel [(const_int 10)
10769 (parallel [(const_int 11)
10770 (const_int 15)])))))))]
10772 "vphaddbq\t{%1, %0|%0, %1}"
10773 [(set_attr "type" "sseiadd1")])
10775 (define_insn "xop_phaddwd"
10776 [(set (match_operand:V4SI 0 "register_operand" "=x")
10780 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10781 (parallel [(const_int 0)
10788 (parallel [(const_int 1)
10791 (const_int 7)])))))]
10793 "vphaddwd\t{%1, %0|%0, %1}"
10794 [(set_attr "type" "sseiadd1")])
10796 (define_insn "xop_phaddwq"
10797 [(set (match_operand:V2DI 0 "register_operand" "=x")
10802 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10803 (parallel [(const_int 0)
10808 (parallel [(const_int 1)
10814 (parallel [(const_int 2)
10819 (parallel [(const_int 3)
10820 (const_int 7)]))))))]
10822 "vphaddwq\t{%1, %0|%0, %1}"
10823 [(set_attr "type" "sseiadd1")])
10825 (define_insn "xop_phadddq"
10826 [(set (match_operand:V2DI 0 "register_operand" "=x")
10830 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10831 (parallel [(const_int 0)
10836 (parallel [(const_int 1)
10837 (const_int 3)])))))]
10839 "vphadddq\t{%1, %0|%0, %1}"
10840 [(set_attr "type" "sseiadd1")])
10842 (define_insn "xop_phaddubw"
10843 [(set (match_operand:V8HI 0 "register_operand" "=x")
10847 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10848 (parallel [(const_int 0)
10859 (parallel [(const_int 1)
10866 (const_int 15)])))))]
10868 "vphaddubw\t{%1, %0|%0, %1}"
10869 [(set_attr "type" "sseiadd1")])
10871 (define_insn "xop_phaddubd"
10872 [(set (match_operand:V4SI 0 "register_operand" "=x")
10877 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10878 (parallel [(const_int 0)
10885 (parallel [(const_int 1)
10888 (const_int 13)]))))
10893 (parallel [(const_int 2)
10900 (parallel [(const_int 3)
10903 (const_int 15)]))))))]
10905 "vphaddubd\t{%1, %0|%0, %1}"
10906 [(set_attr "type" "sseiadd1")])
10908 (define_insn "xop_phaddubq"
10909 [(set (match_operand:V2DI 0 "register_operand" "=x")
10915 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10916 (parallel [(const_int 0)
10921 (parallel [(const_int 1)
10927 (parallel [(const_int 2)
10932 (parallel [(const_int 3)
10933 (const_int 7)])))))
10939 (parallel [(const_int 8)
10944 (parallel [(const_int 9)
10945 (const_int 13)]))))
10950 (parallel [(const_int 10)
10955 (parallel [(const_int 11)
10956 (const_int 15)])))))))]
10958 "vphaddubq\t{%1, %0|%0, %1}"
10959 [(set_attr "type" "sseiadd1")])
10961 (define_insn "xop_phadduwd"
10962 [(set (match_operand:V4SI 0 "register_operand" "=x")
10966 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10967 (parallel [(const_int 0)
10974 (parallel [(const_int 1)
10977 (const_int 7)])))))]
10979 "vphadduwd\t{%1, %0|%0, %1}"
10980 [(set_attr "type" "sseiadd1")])
10982 (define_insn "xop_phadduwq"
10983 [(set (match_operand:V2DI 0 "register_operand" "=x")
10988 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10989 (parallel [(const_int 0)
10994 (parallel [(const_int 1)
11000 (parallel [(const_int 2)
11005 (parallel [(const_int 3)
11006 (const_int 7)]))))))]
11008 "vphadduwq\t{%1, %0|%0, %1}"
11009 [(set_attr "type" "sseiadd1")])
11011 (define_insn "xop_phaddudq"
11012 [(set (match_operand:V2DI 0 "register_operand" "=x")
11016 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11017 (parallel [(const_int 0)
11022 (parallel [(const_int 1)
11023 (const_int 3)])))))]
11025 "vphaddudq\t{%1, %0|%0, %1}"
11026 [(set_attr "type" "sseiadd1")])
11028 (define_insn "xop_phsubbw"
11029 [(set (match_operand:V8HI 0 "register_operand" "=x")
11033 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11034 (parallel [(const_int 0)
11045 (parallel [(const_int 1)
11052 (const_int 15)])))))]
11054 "vphsubbw\t{%1, %0|%0, %1}"
11055 [(set_attr "type" "sseiadd1")])
11057 (define_insn "xop_phsubwd"
11058 [(set (match_operand:V4SI 0 "register_operand" "=x")
11062 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11063 (parallel [(const_int 0)
11070 (parallel [(const_int 1)
11073 (const_int 7)])))))]
11075 "vphsubwd\t{%1, %0|%0, %1}"
11076 [(set_attr "type" "sseiadd1")])
11078 (define_insn "xop_phsubdq"
11079 [(set (match_operand:V2DI 0 "register_operand" "=x")
11083 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11084 (parallel [(const_int 0)
11089 (parallel [(const_int 1)
11090 (const_int 3)])))))]
11092 "vphsubdq\t{%1, %0|%0, %1}"
11093 [(set_attr "type" "sseiadd1")])
11095 ;; XOP permute instructions
11096 (define_insn "xop_pperm"
11097 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11099 [(match_operand:V16QI 1 "register_operand" "x,x")
11100 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11101 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11102 UNSPEC_XOP_PERMUTE))]
11103 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11104 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11105 [(set_attr "type" "sse4arg")
11106 (set_attr "mode" "TI")])
11108 ;; XOP pack instructions that combine two vectors into a smaller vector
11109 (define_insn "xop_pperm_pack_v2di_v4si"
11110 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11113 (match_operand:V2DI 1 "register_operand" "x,x"))
11115 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11116 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11117 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11118 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11119 [(set_attr "type" "sse4arg")
11120 (set_attr "mode" "TI")])
11122 (define_insn "xop_pperm_pack_v4si_v8hi"
11123 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11126 (match_operand:V4SI 1 "register_operand" "x,x"))
11128 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11129 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11130 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11131 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11132 [(set_attr "type" "sse4arg")
11133 (set_attr "mode" "TI")])
11135 (define_insn "xop_pperm_pack_v8hi_v16qi"
11136 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11139 (match_operand:V8HI 1 "register_operand" "x,x"))
11141 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11142 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11143 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11144 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11145 [(set_attr "type" "sse4arg")
11146 (set_attr "mode" "TI")])
11148 ;; XOP packed rotate instructions
11149 (define_expand "rotl<mode>3"
11150 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11151 (rotate:SSEMODE1248
11152 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11153 (match_operand:SI 2 "general_operand")))]
11156 /* If we were given a scalar, convert it to parallel */
11157 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11159 rtvec vs = rtvec_alloc (<ssescalarnum>);
11160 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11161 rtx reg = gen_reg_rtx (<MODE>mode);
11162 rtx op2 = operands[2];
11165 if (GET_MODE (op2) != <ssescalarmode>mode)
11167 op2 = gen_reg_rtx (<ssescalarmode>mode);
11168 convert_move (op2, operands[2], false);
11171 for (i = 0; i < <ssescalarnum>; i++)
11172 RTVEC_ELT (vs, i) = op2;
11174 emit_insn (gen_vec_init<mode> (reg, par));
11175 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11180 (define_expand "rotr<mode>3"
11181 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11182 (rotatert:SSEMODE1248
11183 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11184 (match_operand:SI 2 "general_operand")))]
11187 /* If we were given a scalar, convert it to parallel */
11188 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11190 rtvec vs = rtvec_alloc (<ssescalarnum>);
11191 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11192 rtx neg = gen_reg_rtx (<MODE>mode);
11193 rtx reg = gen_reg_rtx (<MODE>mode);
11194 rtx op2 = operands[2];
11197 if (GET_MODE (op2) != <ssescalarmode>mode)
11199 op2 = gen_reg_rtx (<ssescalarmode>mode);
11200 convert_move (op2, operands[2], false);
11203 for (i = 0; i < <ssescalarnum>; i++)
11204 RTVEC_ELT (vs, i) = op2;
11206 emit_insn (gen_vec_init<mode> (reg, par));
11207 emit_insn (gen_neg<mode>2 (neg, reg));
11208 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11213 (define_insn "xop_rotl<mode>3"
11214 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11215 (rotate:SSEMODE1248
11216 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11217 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11219 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11220 [(set_attr "type" "sseishft")
11221 (set_attr "length_immediate" "1")
11222 (set_attr "mode" "TI")])
11224 (define_insn "xop_rotr<mode>3"
11225 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11226 (rotatert:SSEMODE1248
11227 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11228 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11231 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11232 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11234 [(set_attr "type" "sseishft")
11235 (set_attr "length_immediate" "1")
11236 (set_attr "mode" "TI")])
11238 (define_expand "vrotr<mode>3"
11239 [(match_operand:SSEMODE1248 0 "register_operand" "")
11240 (match_operand:SSEMODE1248 1 "register_operand" "")
11241 (match_operand:SSEMODE1248 2 "register_operand" "")]
11244 rtx reg = gen_reg_rtx (<MODE>mode);
11245 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11246 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11250 (define_expand "vrotl<mode>3"
11251 [(match_operand:SSEMODE1248 0 "register_operand" "")
11252 (match_operand:SSEMODE1248 1 "register_operand" "")
11253 (match_operand:SSEMODE1248 2 "register_operand" "")]
11256 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11260 (define_insn "xop_vrotl<mode>3"
11261 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11262 (if_then_else:SSEMODE1248
11264 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11266 (rotate:SSEMODE1248
11267 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11269 (rotatert:SSEMODE1248
11271 (neg:SSEMODE1248 (match_dup 2)))))]
11272 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11273 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11274 [(set_attr "type" "sseishft")
11275 (set_attr "prefix_data16" "0")
11276 (set_attr "prefix_extra" "2")
11277 (set_attr "mode" "TI")])
11279 ;; XOP packed shift instructions.
11280 ;; FIXME: add V2DI back in
11281 (define_expand "vlshr<mode>3"
11282 [(match_operand:SSEMODE124 0 "register_operand" "")
11283 (match_operand:SSEMODE124 1 "register_operand" "")
11284 (match_operand:SSEMODE124 2 "register_operand" "")]
11287 rtx neg = gen_reg_rtx (<MODE>mode);
11288 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11289 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11293 (define_expand "vashr<mode>3"
11294 [(match_operand:SSEMODE124 0 "register_operand" "")
11295 (match_operand:SSEMODE124 1 "register_operand" "")
11296 (match_operand:SSEMODE124 2 "register_operand" "")]
11299 rtx neg = gen_reg_rtx (<MODE>mode);
11300 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11301 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11305 (define_expand "vashl<mode>3"
11306 [(match_operand:SSEMODE124 0 "register_operand" "")
11307 (match_operand:SSEMODE124 1 "register_operand" "")
11308 (match_operand:SSEMODE124 2 "register_operand" "")]
11311 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11315 (define_insn "xop_ashl<mode>3"
11316 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11317 (if_then_else:SSEMODE1248
11319 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11321 (ashift:SSEMODE1248
11322 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11324 (ashiftrt:SSEMODE1248
11326 (neg:SSEMODE1248 (match_dup 2)))))]
11327 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11328 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11329 [(set_attr "type" "sseishft")
11330 (set_attr "prefix_data16" "0")
11331 (set_attr "prefix_extra" "2")
11332 (set_attr "mode" "TI")])
11334 (define_insn "xop_lshl<mode>3"
11335 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11336 (if_then_else:SSEMODE1248
11338 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11340 (ashift:SSEMODE1248
11341 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11343 (lshiftrt:SSEMODE1248
11345 (neg:SSEMODE1248 (match_dup 2)))))]
11346 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11347 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11348 [(set_attr "type" "sseishft")
11349 (set_attr "prefix_data16" "0")
11350 (set_attr "prefix_extra" "2")
11351 (set_attr "mode" "TI")])
11353 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11354 (define_expand "ashlv16qi3"
11355 [(match_operand:V16QI 0 "register_operand" "")
11356 (match_operand:V16QI 1 "register_operand" "")
11357 (match_operand:SI 2 "nonmemory_operand" "")]
11360 rtvec vs = rtvec_alloc (16);
11361 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11362 rtx reg = gen_reg_rtx (V16QImode);
11364 for (i = 0; i < 16; i++)
11365 RTVEC_ELT (vs, i) = operands[2];
11367 emit_insn (gen_vec_initv16qi (reg, par));
11368 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11372 (define_expand "lshlv16qi3"
11373 [(match_operand:V16QI 0 "register_operand" "")
11374 (match_operand:V16QI 1 "register_operand" "")
11375 (match_operand:SI 2 "nonmemory_operand" "")]
11378 rtvec vs = rtvec_alloc (16);
11379 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11380 rtx reg = gen_reg_rtx (V16QImode);
11382 for (i = 0; i < 16; i++)
11383 RTVEC_ELT (vs, i) = operands[2];
11385 emit_insn (gen_vec_initv16qi (reg, par));
11386 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11390 (define_expand "ashrv16qi3"
11391 [(match_operand:V16QI 0 "register_operand" "")
11392 (match_operand:V16QI 1 "register_operand" "")
11393 (match_operand:SI 2 "nonmemory_operand" "")]
11396 rtvec vs = rtvec_alloc (16);
11397 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11398 rtx reg = gen_reg_rtx (V16QImode);
11400 rtx ele = ((CONST_INT_P (operands[2]))
11401 ? GEN_INT (- INTVAL (operands[2]))
11404 for (i = 0; i < 16; i++)
11405 RTVEC_ELT (vs, i) = ele;
11407 emit_insn (gen_vec_initv16qi (reg, par));
11409 if (!CONST_INT_P (operands[2]))
11411 rtx neg = gen_reg_rtx (V16QImode);
11412 emit_insn (gen_negv16qi2 (neg, reg));
11413 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11416 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11421 (define_expand "ashrv2di3"
11422 [(match_operand:V2DI 0 "register_operand" "")
11423 (match_operand:V2DI 1 "register_operand" "")
11424 (match_operand:DI 2 "nonmemory_operand" "")]
11427 rtvec vs = rtvec_alloc (2);
11428 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11429 rtx reg = gen_reg_rtx (V2DImode);
11432 if (CONST_INT_P (operands[2]))
11433 ele = GEN_INT (- INTVAL (operands[2]));
11434 else if (GET_MODE (operands[2]) != DImode)
11436 rtx move = gen_reg_rtx (DImode);
11437 ele = gen_reg_rtx (DImode);
11438 convert_move (move, operands[2], false);
11439 emit_insn (gen_negdi2 (ele, move));
11443 ele = gen_reg_rtx (DImode);
11444 emit_insn (gen_negdi2 (ele, operands[2]));
11447 RTVEC_ELT (vs, 0) = ele;
11448 RTVEC_ELT (vs, 1) = ele;
11449 emit_insn (gen_vec_initv2di (reg, par));
11450 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11454 ;; XOP FRCZ support
11456 (define_insn "xop_frcz<mode>2"
11457 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11459 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11462 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11463 [(set_attr "type" "ssecvt1")
11464 (set_attr "mode" "<MODE>")])
11467 (define_insn "xop_vmfrcz<mode>2"
11468 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11469 (vec_merge:SSEMODEF2P
11471 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11473 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11476 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11477 [(set_attr "type" "ssecvt1")
11478 (set_attr "mode" "<MODE>")])
11480 (define_insn "xop_frcz<mode>2256"
11481 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11483 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11486 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11487 [(set_attr "type" "ssecvt1")
11488 (set_attr "mode" "<MODE>")])
11490 (define_insn "xop_maskcmp<mode>3"
11491 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11492 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11493 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11494 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11496 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11497 [(set_attr "type" "sse4arg")
11498 (set_attr "prefix_data16" "0")
11499 (set_attr "prefix_rep" "0")
11500 (set_attr "prefix_extra" "2")
11501 (set_attr "length_immediate" "1")
11502 (set_attr "mode" "TI")])
11504 (define_insn "xop_maskcmp_uns<mode>3"
11505 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11506 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11507 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11508 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11510 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11511 [(set_attr "type" "ssecmp")
11512 (set_attr "prefix_data16" "0")
11513 (set_attr "prefix_rep" "0")
11514 (set_attr "prefix_extra" "2")
11515 (set_attr "length_immediate" "1")
11516 (set_attr "mode" "TI")])
11518 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11519 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11520 ;; the exact instruction generated for the intrinsic.
11521 (define_insn "xop_maskcmp_uns2<mode>3"
11522 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11523 (unspec:SSEMODE1248
11524 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11525 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11526 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11527 UNSPEC_XOP_UNSIGNED_CMP))]
11529 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11530 [(set_attr "type" "ssecmp")
11531 (set_attr "prefix_data16" "0")
11532 (set_attr "prefix_extra" "2")
11533 (set_attr "length_immediate" "1")
11534 (set_attr "mode" "TI")])
11536 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11537 ;; being added here to be complete.
11538 (define_insn "xop_pcom_tf<mode>3"
11539 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11540 (unspec:SSEMODE1248
11541 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11542 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11543 (match_operand:SI 3 "const_int_operand" "n")]
11544 UNSPEC_XOP_TRUEFALSE))]
11547 return ((INTVAL (operands[3]) != 0)
11548 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11549 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11551 [(set_attr "type" "ssecmp")
11552 (set_attr "prefix_data16" "0")
11553 (set_attr "prefix_extra" "2")
11554 (set_attr "length_immediate" "1")
11555 (set_attr "mode" "TI")])
11557 (define_insn "xop_vpermil2<mode>3"
11558 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11560 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11561 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11562 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11563 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11566 "vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11567 [(set_attr "type" "sse4arg")
11568 (set_attr "length_immediate" "1")
11569 (set_attr "mode" "<MODE>")])
11571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11572 (define_insn "*avx_aesenc"
11573 [(set (match_operand:V2DI 0 "register_operand" "=x")
11574 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11575 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11577 "TARGET_AES && TARGET_AVX"
11578 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11579 [(set_attr "type" "sselog1")
11580 (set_attr "prefix_extra" "1")
11581 (set_attr "prefix" "vex")
11582 (set_attr "mode" "TI")])
11584 (define_insn "aesenc"
11585 [(set (match_operand:V2DI 0 "register_operand" "=x")
11586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11587 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11590 "aesenc\t{%2, %0|%0, %2}"
11591 [(set_attr "type" "sselog1")
11592 (set_attr "prefix_extra" "1")
11593 (set_attr "mode" "TI")])
11595 (define_insn "*avx_aesenclast"
11596 [(set (match_operand:V2DI 0 "register_operand" "=x")
11597 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11598 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11599 UNSPEC_AESENCLAST))]
11600 "TARGET_AES && TARGET_AVX"
11601 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11602 [(set_attr "type" "sselog1")
11603 (set_attr "prefix_extra" "1")
11604 (set_attr "prefix" "vex")
11605 (set_attr "mode" "TI")])
11607 (define_insn "aesenclast"
11608 [(set (match_operand:V2DI 0 "register_operand" "=x")
11609 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11610 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11611 UNSPEC_AESENCLAST))]
11613 "aesenclast\t{%2, %0|%0, %2}"
11614 [(set_attr "type" "sselog1")
11615 (set_attr "prefix_extra" "1")
11616 (set_attr "mode" "TI")])
11618 (define_insn "*avx_aesdec"
11619 [(set (match_operand:V2DI 0 "register_operand" "=x")
11620 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11621 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11623 "TARGET_AES && TARGET_AVX"
11624 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11625 [(set_attr "type" "sselog1")
11626 (set_attr "prefix_extra" "1")
11627 (set_attr "prefix" "vex")
11628 (set_attr "mode" "TI")])
11630 (define_insn "aesdec"
11631 [(set (match_operand:V2DI 0 "register_operand" "=x")
11632 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11633 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11636 "aesdec\t{%2, %0|%0, %2}"
11637 [(set_attr "type" "sselog1")
11638 (set_attr "prefix_extra" "1")
11639 (set_attr "mode" "TI")])
11641 (define_insn "*avx_aesdeclast"
11642 [(set (match_operand:V2DI 0 "register_operand" "=x")
11643 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11644 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11645 UNSPEC_AESDECLAST))]
11646 "TARGET_AES && TARGET_AVX"
11647 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11648 [(set_attr "type" "sselog1")
11649 (set_attr "prefix_extra" "1")
11650 (set_attr "prefix" "vex")
11651 (set_attr "mode" "TI")])
11653 (define_insn "aesdeclast"
11654 [(set (match_operand:V2DI 0 "register_operand" "=x")
11655 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11656 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11657 UNSPEC_AESDECLAST))]
11659 "aesdeclast\t{%2, %0|%0, %2}"
11660 [(set_attr "type" "sselog1")
11661 (set_attr "prefix_extra" "1")
11662 (set_attr "mode" "TI")])
11664 (define_insn "aesimc"
11665 [(set (match_operand:V2DI 0 "register_operand" "=x")
11666 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11669 "%vaesimc\t{%1, %0|%0, %1}"
11670 [(set_attr "type" "sselog1")
11671 (set_attr "prefix_extra" "1")
11672 (set_attr "prefix" "maybe_vex")
11673 (set_attr "mode" "TI")])
11675 (define_insn "aeskeygenassist"
11676 [(set (match_operand:V2DI 0 "register_operand" "=x")
11677 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11678 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11679 UNSPEC_AESKEYGENASSIST))]
11681 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11682 [(set_attr "type" "sselog1")
11683 (set_attr "prefix_extra" "1")
11684 (set_attr "length_immediate" "1")
11685 (set_attr "prefix" "maybe_vex")
11686 (set_attr "mode" "TI")])
11688 (define_insn "*vpclmulqdq"
11689 [(set (match_operand:V2DI 0 "register_operand" "=x")
11690 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11691 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11692 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11694 "TARGET_PCLMUL && TARGET_AVX"
11695 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11696 [(set_attr "type" "sselog1")
11697 (set_attr "prefix_extra" "1")
11698 (set_attr "length_immediate" "1")
11699 (set_attr "prefix" "vex")
11700 (set_attr "mode" "TI")])
11702 (define_insn "pclmulqdq"
11703 [(set (match_operand:V2DI 0 "register_operand" "=x")
11704 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11705 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11706 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11709 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11710 [(set_attr "type" "sselog1")
11711 (set_attr "prefix_extra" "1")
11712 (set_attr "length_immediate" "1")
11713 (set_attr "mode" "TI")])
11715 (define_expand "avx_vzeroall"
11716 [(match_par_dup 0 [(const_int 0)])]
11719 int nregs = TARGET_64BIT ? 16 : 8;
11722 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11724 XVECEXP (operands[0], 0, 0)
11725 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11728 for (regno = 0; regno < nregs; regno++)
11729 XVECEXP (operands[0], 0, regno + 1)
11730 = gen_rtx_SET (VOIDmode,
11731 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11732 CONST0_RTX (V8SImode));
11735 (define_insn "*avx_vzeroall"
11736 [(match_parallel 0 "vzeroall_operation"
11737 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11740 [(set_attr "type" "sse")
11741 (set_attr "modrm" "0")
11742 (set_attr "memory" "none")
11743 (set_attr "prefix" "vex")
11744 (set_attr "mode" "OI")])
11746 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11747 (define_expand "avx_vzeroupper"
11748 [(match_par_dup 0 [(const_int 0)])]
11751 int nregs = TARGET_64BIT ? 16 : 8;
11754 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11756 XVECEXP (operands[0], 0, 0)
11757 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11758 UNSPECV_VZEROUPPER);
11760 for (regno = 0; regno < nregs; regno++)
11761 XVECEXP (operands[0], 0, regno + 1)
11762 = gen_rtx_CLOBBER (VOIDmode,
11763 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11766 (define_insn "*avx_vzeroupper"
11767 [(match_parallel 0 "vzeroupper_operation"
11768 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11771 [(set_attr "type" "sse")
11772 (set_attr "modrm" "0")
11773 (set_attr "memory" "none")
11774 (set_attr "prefix" "vex")
11775 (set_attr "mode" "OI")])
11777 (define_insn_and_split "vec_dup<mode>"
11778 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11779 (vec_duplicate:AVX256MODE24P
11780 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11783 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}
11785 "&& reload_completed && REG_P (operands[1])"
11786 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11787 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11789 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11791 [(set_attr "type" "ssemov")
11792 (set_attr "prefix_extra" "1")
11793 (set_attr "prefix" "vex")
11794 (set_attr "mode" "V8SF")])
11796 (define_insn "avx_vbroadcastf128_<mode>"
11797 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11798 (vec_concat:AVX256MODE
11799 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11803 vbroadcastf128\t{%1, %0|%0, %1}
11804 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11805 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11806 [(set_attr "type" "ssemov,sselog1,sselog1")
11807 (set_attr "prefix_extra" "1")
11808 (set_attr "length_immediate" "0,1,1")
11809 (set_attr "prefix" "vex")
11810 (set_attr "mode" "V4SF,V8SF,V8SF")])
11812 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11813 ;; If it so happens that the input is in memory, use vbroadcast.
11814 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11815 (define_insn "*avx_vperm_broadcast_v4sf"
11816 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11818 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11819 (match_parallel 2 "avx_vbroadcast_operand"
11820 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11823 int elt = INTVAL (operands[3]);
11824 switch (which_alternative)
11828 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11829 return "vbroadcastss\t{%1, %0|%0, %1}";
11831 operands[2] = GEN_INT (elt * 0x55);
11832 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11834 gcc_unreachable ();
11837 [(set_attr "type" "ssemov,ssemov,sselog1")
11838 (set_attr "prefix_extra" "1")
11839 (set_attr "length_immediate" "0,0,1")
11840 (set_attr "prefix" "vex")
11841 (set_attr "mode" "SF,SF,V4SF")])
11843 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11844 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11845 (vec_select:AVX256MODEF2P
11846 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11847 (match_parallel 2 "avx_vbroadcast_operand"
11848 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11851 "&& reload_completed"
11852 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11854 rtx op0 = operands[0], op1 = operands[1];
11855 int elt = INTVAL (operands[3]);
11861 /* Shuffle element we care about into all elements of the 128-bit lane.
11862 The other lane gets shuffled too, but we don't care. */
11863 if (<MODE>mode == V4DFmode)
11864 mask = (elt & 1 ? 15 : 0);
11866 mask = (elt & 3) * 0x55;
11867 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11869 /* Shuffle the lane we care about into both lanes of the dest. */
11870 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11871 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11875 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11876 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11879 (define_expand "avx_vpermil<mode>"
11880 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11881 (vec_select:AVXMODEFDP
11882 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11883 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11886 int mask = INTVAL (operands[2]);
11887 rtx perm[<ssescalarnum>];
11889 perm[0] = GEN_INT (mask & 1);
11890 perm[1] = GEN_INT ((mask >> 1) & 1);
11891 if (<MODE>mode == V4DFmode)
11893 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11894 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11898 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11901 (define_expand "avx_vpermil<mode>"
11902 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11903 (vec_select:AVXMODEFSP
11904 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11905 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11908 int mask = INTVAL (operands[2]);
11909 rtx perm[<ssescalarnum>];
11911 perm[0] = GEN_INT (mask & 3);
11912 perm[1] = GEN_INT ((mask >> 2) & 3);
11913 perm[2] = GEN_INT ((mask >> 4) & 3);
11914 perm[3] = GEN_INT ((mask >> 6) & 3);
11915 if (<MODE>mode == V8SFmode)
11917 perm[4] = GEN_INT ((mask & 3) + 4);
11918 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11919 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11920 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11924 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11927 (define_insn "*avx_vpermilp<mode>"
11928 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11929 (vec_select:AVXMODEF2P
11930 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11931 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11932 [(match_operand 3 "const_int_operand" "")])))]
11935 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11936 operands[2] = GEN_INT (mask);
11937 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11939 [(set_attr "type" "sselog")
11940 (set_attr "prefix_extra" "1")
11941 (set_attr "length_immediate" "1")
11942 (set_attr "prefix" "vex")
11943 (set_attr "mode" "<MODE>")])
11945 (define_insn "avx_vpermilvar<mode>3"
11946 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11948 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11949 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11952 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11953 [(set_attr "type" "sselog")
11954 (set_attr "prefix_extra" "1")
11955 (set_attr "prefix" "vex")
11956 (set_attr "mode" "<MODE>")])
11958 (define_expand "avx_vperm2f128<mode>3"
11959 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11960 (unspec:AVX256MODE2P
11961 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11962 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11963 (match_operand:SI 3 "const_0_to_255_operand" "")]
11964 UNSPEC_VPERMIL2F128))]
11967 int mask = INTVAL (operands[3]);
11968 if ((mask & 0x88) == 0)
11970 rtx perm[<ssescalarnum>], t1, t2;
11971 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11973 base = (mask & 3) * nelt2;
11974 for (i = 0; i < nelt2; ++i)
11975 perm[i] = GEN_INT (base + i);
11977 base = ((mask >> 4) & 3) * nelt2;
11978 for (i = 0; i < nelt2; ++i)
11979 perm[i + nelt2] = GEN_INT (base + i);
11981 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11982 operands[1], operands[2]);
11983 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11984 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11985 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11991 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11992 ;; means that in order to represent this properly in rtl we'd have to
11993 ;; nest *another* vec_concat with a zero operand and do the select from
11994 ;; a 4x wide vector. That doesn't seem very nice.
11995 (define_insn "*avx_vperm2f128<mode>_full"
11996 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11997 (unspec:AVX256MODE2P
11998 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11999 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12000 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12001 UNSPEC_VPERMIL2F128))]
12003 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12004 [(set_attr "type" "sselog")
12005 (set_attr "prefix_extra" "1")
12006 (set_attr "length_immediate" "1")
12007 (set_attr "prefix" "vex")
12008 (set_attr "mode" "V8SF")])
12010 (define_insn "*avx_vperm2f128<mode>_nozero"
12011 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12012 (vec_select:AVX256MODE2P
12013 (vec_concat:<ssedoublesizemode>
12014 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12015 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12016 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
12017 [(match_operand 4 "const_int_operand" "")])))]
12020 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12021 operands[3] = GEN_INT (mask);
12022 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12024 [(set_attr "type" "sselog")
12025 (set_attr "prefix_extra" "1")
12026 (set_attr "length_immediate" "1")
12027 (set_attr "prefix" "vex")
12028 (set_attr "mode" "V8SF")])
12030 (define_expand "avx_vinsertf128<mode>"
12031 [(match_operand:AVX256MODE 0 "register_operand" "")
12032 (match_operand:AVX256MODE 1 "register_operand" "")
12033 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12034 (match_operand:SI 3 "const_0_to_1_operand" "")]
12037 switch (INTVAL (operands[3]))
12040 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12044 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12048 gcc_unreachable ();
12053 (define_insn "vec_set_lo_<mode>"
12054 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12055 (vec_concat:AVX256MODE4P
12056 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12057 (vec_select:<avxhalfvecmode>
12058 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12059 (parallel [(const_int 2) (const_int 3)]))))]
12061 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12062 [(set_attr "type" "sselog")
12063 (set_attr "prefix_extra" "1")
12064 (set_attr "length_immediate" "1")
12065 (set_attr "prefix" "vex")
12066 (set_attr "mode" "V8SF")])
12068 (define_insn "vec_set_hi_<mode>"
12069 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12070 (vec_concat:AVX256MODE4P
12071 (vec_select:<avxhalfvecmode>
12072 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12073 (parallel [(const_int 0) (const_int 1)]))
12074 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12076 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12077 [(set_attr "type" "sselog")
12078 (set_attr "prefix_extra" "1")
12079 (set_attr "length_immediate" "1")
12080 (set_attr "prefix" "vex")
12081 (set_attr "mode" "V8SF")])
12083 (define_insn "vec_set_lo_<mode>"
12084 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12085 (vec_concat:AVX256MODE8P
12086 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12087 (vec_select:<avxhalfvecmode>
12088 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12089 (parallel [(const_int 4) (const_int 5)
12090 (const_int 6) (const_int 7)]))))]
12092 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12093 [(set_attr "type" "sselog")
12094 (set_attr "prefix_extra" "1")
12095 (set_attr "length_immediate" "1")
12096 (set_attr "prefix" "vex")
12097 (set_attr "mode" "V8SF")])
12099 (define_insn "vec_set_hi_<mode>"
12100 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12101 (vec_concat:AVX256MODE8P
12102 (vec_select:<avxhalfvecmode>
12103 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12104 (parallel [(const_int 0) (const_int 1)
12105 (const_int 2) (const_int 3)]))
12106 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12108 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12109 [(set_attr "type" "sselog")
12110 (set_attr "prefix_extra" "1")
12111 (set_attr "length_immediate" "1")
12112 (set_attr "prefix" "vex")
12113 (set_attr "mode" "V8SF")])
12115 (define_insn "vec_set_lo_v16hi"
12116 [(set (match_operand:V16HI 0 "register_operand" "=x")
12118 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12120 (match_operand:V16HI 1 "register_operand" "x")
12121 (parallel [(const_int 8) (const_int 9)
12122 (const_int 10) (const_int 11)
12123 (const_int 12) (const_int 13)
12124 (const_int 14) (const_int 15)]))))]
12126 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12127 [(set_attr "type" "sselog")
12128 (set_attr "prefix_extra" "1")
12129 (set_attr "length_immediate" "1")
12130 (set_attr "prefix" "vex")
12131 (set_attr "mode" "V8SF")])
12133 (define_insn "vec_set_hi_v16hi"
12134 [(set (match_operand:V16HI 0 "register_operand" "=x")
12137 (match_operand:V16HI 1 "register_operand" "x")
12138 (parallel [(const_int 0) (const_int 1)
12139 (const_int 2) (const_int 3)
12140 (const_int 4) (const_int 5)
12141 (const_int 6) (const_int 7)]))
12142 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12144 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12145 [(set_attr "type" "sselog")
12146 (set_attr "prefix_extra" "1")
12147 (set_attr "length_immediate" "1")
12148 (set_attr "prefix" "vex")
12149 (set_attr "mode" "V8SF")])
12151 (define_insn "vec_set_lo_v32qi"
12152 [(set (match_operand:V32QI 0 "register_operand" "=x")
12154 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12156 (match_operand:V32QI 1 "register_operand" "x")
12157 (parallel [(const_int 16) (const_int 17)
12158 (const_int 18) (const_int 19)
12159 (const_int 20) (const_int 21)
12160 (const_int 22) (const_int 23)
12161 (const_int 24) (const_int 25)
12162 (const_int 26) (const_int 27)
12163 (const_int 28) (const_int 29)
12164 (const_int 30) (const_int 31)]))))]
12166 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12167 [(set_attr "type" "sselog")
12168 (set_attr "prefix_extra" "1")
12169 (set_attr "length_immediate" "1")
12170 (set_attr "prefix" "vex")
12171 (set_attr "mode" "V8SF")])
12173 (define_insn "vec_set_hi_v32qi"
12174 [(set (match_operand:V32QI 0 "register_operand" "=x")
12177 (match_operand:V32QI 1 "register_operand" "x")
12178 (parallel [(const_int 0) (const_int 1)
12179 (const_int 2) (const_int 3)
12180 (const_int 4) (const_int 5)
12181 (const_int 6) (const_int 7)
12182 (const_int 8) (const_int 9)
12183 (const_int 10) (const_int 11)
12184 (const_int 12) (const_int 13)
12185 (const_int 14) (const_int 15)]))
12186 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12188 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12189 [(set_attr "type" "sselog")
12190 (set_attr "prefix_extra" "1")
12191 (set_attr "length_immediate" "1")
12192 (set_attr "prefix" "vex")
12193 (set_attr "mode" "V8SF")])
12195 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12196 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12198 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12199 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12203 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12204 [(set_attr "type" "sselog1")
12205 (set_attr "prefix_extra" "1")
12206 (set_attr "prefix" "vex")
12207 (set_attr "mode" "<MODE>")])
12209 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12210 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12212 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12213 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12215 UNSPEC_MASKSTORE))]
12217 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12218 [(set_attr "type" "sselog1")
12219 (set_attr "prefix_extra" "1")
12220 (set_attr "prefix" "vex")
12221 (set_attr "mode" "<MODE>")])
12223 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12224 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12225 (unspec:AVX256MODE2P
12226 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12230 switch (which_alternative)
12235 switch (get_attr_mode (insn))
12238 return "vmovaps\t{%1, %x0|%x0, %1}";
12240 return "vmovapd\t{%1, %x0|%x0, %1}";
12242 return "vmovdqa\t{%1, %x0|%x0, %1}";
12249 gcc_unreachable ();
12251 [(set_attr "type" "ssemov")
12252 (set_attr "prefix" "vex")
12253 (set_attr "mode" "<avxvecmode>")
12254 (set (attr "length")
12255 (if_then_else (eq_attr "alternative" "0")
12257 (const_string "*")))])
12259 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12260 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12261 (unspec:<avxhalfvecmode>
12262 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12266 switch (which_alternative)
12271 switch (get_attr_mode (insn))
12274 return "vmovaps\t{%x1, %0|%0, %x1}";
12276 return "vmovapd\t{%x1, %0|%0, %x1}";
12278 return "vmovdqa\t{%x1, %0|%0, %x1}";
12285 gcc_unreachable ();
12287 [(set_attr "type" "ssemov")
12288 (set_attr "prefix" "vex")
12289 (set_attr "mode" "<avxvecmode>")
12290 (set (attr "length")
12291 (if_then_else (eq_attr "alternative" "0")
12293 (const_string "*")))])
12295 (define_expand "vec_init<mode>"
12296 [(match_operand:AVX256MODE 0 "register_operand" "")
12297 (match_operand 1 "" "")]
12300 ix86_expand_vector_init (false, operands[0], operands[1]);
12304 (define_insn "*vec_concat<mode>_avx"
12305 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12306 (vec_concat:AVX256MODE
12307 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12308 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12311 switch (which_alternative)
12314 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12316 switch (get_attr_mode (insn))
12319 return "vmovaps\t{%1, %x0|%x0, %1}";
12321 return "vmovapd\t{%1, %x0|%x0, %1}";
12323 return "vmovdqa\t{%1, %x0|%x0, %1}";
12326 gcc_unreachable ();
12329 [(set_attr "type" "sselog,ssemov")
12330 (set_attr "prefix_extra" "1,*")
12331 (set_attr "length_immediate" "1,*")
12332 (set_attr "prefix" "vex")
12333 (set_attr "mode" "<avxvecmode>")])