1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
59 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
61 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
62 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
63 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
64 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
65 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
66 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
67 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
68 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
69 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
71 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
73 ;; Int-float size matches
74 (define_mode_iterator SSEMODE4S [V4SF V4SI])
75 (define_mode_iterator SSEMODE2D [V2DF V2DI])
77 ;; Modes handled by integer vcond pattern
78 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
79 (V2DI "TARGET_SSE4_2")])
81 ;; Modes handled by vec_extract_even/odd pattern.
82 (define_mode_iterator SSEMODE_EO
85 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
86 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
87 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
89 ;; Modes handled by storent patterns.
90 (define_mode_iterator STORENT_MODE
91 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
92 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
94 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
96 ;; Modes handled by vector float patterns.
97 (define_mode_iterator VEC_FLOAT_MODE
98 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
99 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
101 ;; Modes handled by vector extract patterns.
102 (define_mode_iterator VEC_EXTRACT_MODE
103 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
104 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
105 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
106 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
108 ;; Mapping from float mode to required SSE level
109 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
111 ;; Mapping from integer vector mode to mnemonic suffix
112 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
114 ;; Mapping of the insn mnemonic suffix
115 (define_mode_attr ssemodesuffix
116 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
117 (V8SI "ps") (V4DI "pd")])
118 (define_mode_attr ssescalarmodesuffix
119 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
122 ;; Mapping of the max integer size for xop rotate immediate constraint
123 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
125 ;; Mapping of vector modes back to the scalar modes
126 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
127 (V16QI "QI") (V8HI "HI")
128 (V4SI "SI") (V2DI "DI")])
130 ;; Mapping of vector modes to a vector mode of double size
131 (define_mode_attr ssedoublesizemode
132 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
133 (V8HI "V16HI") (V16QI "V32QI")
134 (V4DF "V8DF") (V8SF "V16SF")
135 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
137 ;; Number of scalar elements in each vector type
138 (define_mode_attr ssescalarnum
139 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
140 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
143 (define_mode_attr avxvecmode
144 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
145 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
146 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
147 (define_mode_attr avxvecpsmode
148 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
149 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
150 (define_mode_attr avxhalfvecmode
151 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
152 (V8SF "V4SF") (V4DF "V2DF")
153 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
154 (define_mode_attr avxscalarmode
155 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
156 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
157 (define_mode_attr avxcvtvecmode
158 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
159 (define_mode_attr avxpermvecmode
160 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
161 (define_mode_attr avxmodesuffixp
162 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
164 (define_mode_attr avxmodesuffix
165 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
166 (V8SI "256") (V8SF "256") (V4DF "256")])
168 ;; Mapping of immediate bits for blend instructions
169 (define_mode_attr blendbits
170 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
172 ;; Mapping of immediate bits for pinsr instructions
173 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
175 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
183 (define_expand "mov<mode>"
184 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
185 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
188 ix86_expand_vector_move (<MODE>mode, operands);
192 (define_insn "*avx_mov<mode>_internal"
193 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
194 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
196 && (register_operand (operands[0], <MODE>mode)
197 || register_operand (operands[1], <MODE>mode))"
199 switch (which_alternative)
202 return standard_sse_constant_opcode (insn, operands[1]);
205 switch (get_attr_mode (insn))
209 return "vmovaps\t{%1, %0|%0, %1}";
212 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
213 return "vmovaps\t{%1, %0|%0, %1}";
215 return "vmovapd\t{%1, %0|%0, %1}";
217 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
218 return "vmovaps\t{%1, %0|%0, %1}";
220 return "vmovdqa\t{%1, %0|%0, %1}";
226 [(set_attr "type" "sselog1,ssemov,ssemov")
227 (set_attr "prefix" "vex")
228 (set_attr "mode" "<avxvecmode>")])
230 ;; All of these patterns are enabled for SSE1 as well as SSE2.
231 ;; This is essential for maintaining stable calling conventions.
233 (define_expand "mov<mode>"
234 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
235 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
238 ix86_expand_vector_move (<MODE>mode, operands);
242 (define_insn "*mov<mode>_internal"
243 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
244 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
246 && (register_operand (operands[0], <MODE>mode)
247 || register_operand (operands[1], <MODE>mode))"
249 switch (which_alternative)
252 return standard_sse_constant_opcode (insn, operands[1]);
255 switch (get_attr_mode (insn))
258 return "movaps\t{%1, %0|%0, %1}";
260 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
261 return "movaps\t{%1, %0|%0, %1}";
263 return "movapd\t{%1, %0|%0, %1}";
265 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
266 return "movaps\t{%1, %0|%0, %1}";
268 return "movdqa\t{%1, %0|%0, %1}";
274 [(set_attr "type" "sselog1,ssemov,ssemov")
276 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
277 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
278 (and (eq_attr "alternative" "2")
279 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
281 (const_string "V4SF")
282 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
283 (const_string "V4SF")
284 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
285 (const_string "V2DF")
287 (const_string "TI")))])
289 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
290 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
291 ;; from memory, we'd prefer to load the memory directly into the %xmm
292 ;; register. To facilitate this happy circumstance, this pattern won't
293 ;; split until after register allocation. If the 64-bit value didn't
294 ;; come from memory, this is the best we can do. This is much better
295 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
298 (define_insn_and_split "movdi_to_sse"
300 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
301 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
302 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
303 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
305 "&& reload_completed"
308 if (register_operand (operands[1], DImode))
310 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
311 Assemble the 64-bit DImode value in an xmm register. */
312 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
313 gen_rtx_SUBREG (SImode, operands[1], 0)));
314 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
315 gen_rtx_SUBREG (SImode, operands[1], 4)));
316 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
319 else if (memory_operand (operands[1], DImode))
320 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
321 operands[1], const0_rtx));
327 [(set (match_operand:V4SF 0 "register_operand" "")
328 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
329 "TARGET_SSE && reload_completed"
332 (vec_duplicate:V4SF (match_dup 1))
336 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
337 operands[2] = CONST0_RTX (V4SFmode);
341 [(set (match_operand:V2DF 0 "register_operand" "")
342 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
343 "TARGET_SSE2 && reload_completed"
344 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
346 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
347 operands[2] = CONST0_RTX (DFmode);
350 (define_expand "push<mode>1"
351 [(match_operand:AVX256MODE 0 "register_operand" "")]
354 ix86_expand_push (<MODE>mode, operands[0]);
358 (define_expand "push<mode>1"
359 [(match_operand:SSEMODE16 0 "register_operand" "")]
362 ix86_expand_push (<MODE>mode, operands[0]);
366 (define_expand "movmisalign<mode>"
367 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
368 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
371 ix86_expand_vector_move_misalign (<MODE>mode, operands);
375 (define_expand "movmisalign<mode>"
376 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
377 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
380 ix86_expand_vector_move_misalign (<MODE>mode, operands);
384 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
385 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
387 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
389 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
390 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
391 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
392 [(set_attr "type" "ssemov")
393 (set_attr "movu" "1")
394 (set_attr "prefix" "vex")
395 (set_attr "mode" "<MODE>")])
397 (define_insn "sse2_movq128"
398 [(set (match_operand:V2DI 0 "register_operand" "=x")
401 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
402 (parallel [(const_int 0)]))
405 "%vmovq\t{%1, %0|%0, %1}"
406 [(set_attr "type" "ssemov")
407 (set_attr "prefix" "maybe_vex")
408 (set_attr "mode" "TI")])
410 (define_insn "<sse>_movu<ssemodesuffix>"
411 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
413 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
415 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
416 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
417 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "movu" "1")
420 (set_attr "mode" "<MODE>")])
422 (define_insn "avx_movdqu<avxmodesuffix>"
423 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
425 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
427 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
428 "vmovdqu\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssemov")
430 (set_attr "movu" "1")
431 (set_attr "prefix" "vex")
432 (set_attr "mode" "<avxvecmode>")])
434 (define_insn "sse2_movdqu"
435 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
436 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
438 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
439 "movdqu\t{%1, %0|%0, %1}"
440 [(set_attr "type" "ssemov")
441 (set_attr "movu" "1")
442 (set_attr "prefix_data16" "1")
443 (set_attr "mode" "TI")])
445 (define_insn "avx_movnt<mode>"
446 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
448 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
450 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
451 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
452 [(set_attr "type" "ssemov")
453 (set_attr "prefix" "vex")
454 (set_attr "mode" "<MODE>")])
456 (define_insn "<sse>_movnt<mode>"
457 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
459 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
461 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
462 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
463 [(set_attr "type" "ssemov")
464 (set_attr "mode" "<MODE>")])
466 (define_insn "avx_movnt<mode>"
467 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
469 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
472 "vmovntdq\t{%1, %0|%0, %1}"
473 [(set_attr "type" "ssecvt")
474 (set_attr "prefix" "vex")
475 (set_attr "mode" "<avxvecmode>")])
477 (define_insn "sse2_movntv2di"
478 [(set (match_operand:V2DI 0 "memory_operand" "=m")
479 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
482 "movntdq\t{%1, %0|%0, %1}"
483 [(set_attr "type" "ssemov")
484 (set_attr "prefix_data16" "1")
485 (set_attr "mode" "TI")])
487 (define_insn "sse2_movntsi"
488 [(set (match_operand:SI 0 "memory_operand" "=m")
489 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
492 "movnti\t{%1, %0|%0, %1}"
493 [(set_attr "type" "ssemov")
494 (set_attr "prefix_data16" "0")
495 (set_attr "mode" "V2DF")])
497 (define_insn "avx_lddqu<avxmodesuffix>"
498 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
500 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
503 "vlddqu\t{%1, %0|%0, %1}"
504 [(set_attr "type" "ssecvt")
505 (set_attr "movu" "1")
506 (set_attr "prefix" "vex")
507 (set_attr "mode" "<avxvecmode>")])
509 (define_insn "sse3_lddqu"
510 [(set (match_operand:V16QI 0 "register_operand" "=x")
511 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
514 "lddqu\t{%1, %0|%0, %1}"
515 [(set_attr "type" "ssemov")
516 (set_attr "movu" "1")
517 (set_attr "prefix_data16" "0")
518 (set_attr "prefix_rep" "1")
519 (set_attr "mode" "TI")])
521 ; Expand patterns for non-temporal stores. At the moment, only those
522 ; that directly map to insns are defined; it would be possible to
523 ; define patterns for other modes that would expand to several insns.
525 (define_expand "storent<mode>"
526 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
528 [(match_operand:STORENT_MODE 1 "register_operand" "")]
531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
533 ;; Parallel floating point arithmetic
535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
537 (define_expand "<code><mode>2"
538 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
539 (absneg:VEC_FLOAT_MODE
540 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
542 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
544 (define_expand "<plusminus_insn><mode>3"
545 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
546 (plusminus:AVX256MODEF2P
547 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
548 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
549 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
550 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
552 (define_insn "*avx_<plusminus_insn><mode>3"
553 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
554 (plusminus:AVXMODEF2P
555 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
556 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
557 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
558 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
559 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
560 [(set_attr "type" "sseadd")
561 (set_attr "prefix" "vex")
562 (set_attr "mode" "<avxvecmode>")])
564 (define_expand "<plusminus_insn><mode>3"
565 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
566 (plusminus:SSEMODEF2P
567 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
568 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
569 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
570 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
572 (define_insn "*<plusminus_insn><mode>3"
573 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
574 (plusminus:SSEMODEF2P
575 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
576 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
577 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
578 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
579 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
580 [(set_attr "type" "sseadd")
581 (set_attr "mode" "<MODE>")])
583 (define_insn "*avx_vm<plusminus_insn><mode>3"
584 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
585 (vec_merge:SSEMODEF2P
586 (plusminus:SSEMODEF2P
587 (match_operand:SSEMODEF2P 1 "register_operand" "x")
588 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
591 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
592 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
593 [(set_attr "type" "sseadd")
594 (set_attr "prefix" "vex")
595 (set_attr "mode" "<ssescalarmode>")])
597 (define_insn "<sse>_vm<plusminus_insn><mode>3"
598 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
599 (vec_merge:SSEMODEF2P
600 (plusminus:SSEMODEF2P
601 (match_operand:SSEMODEF2P 1 "register_operand" "0")
602 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
605 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
606 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
607 [(set_attr "type" "sseadd")
608 (set_attr "mode" "<ssescalarmode>")])
610 (define_expand "mul<mode>3"
611 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
613 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
614 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
615 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
616 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
618 (define_insn "*avx_mul<mode>3"
619 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
621 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
622 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
623 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
624 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
625 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
626 [(set_attr "type" "ssemul")
627 (set_attr "prefix" "vex")
628 (set_attr "mode" "<avxvecmode>")])
630 (define_expand "mul<mode>3"
631 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
633 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
634 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
635 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
636 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
638 (define_insn "*mul<mode>3"
639 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
641 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
642 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
643 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
644 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
645 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
646 [(set_attr "type" "ssemul")
647 (set_attr "mode" "<MODE>")])
649 (define_insn "*avx_vmmul<mode>3"
650 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
651 (vec_merge:SSEMODEF2P
653 (match_operand:SSEMODEF2P 1 "register_operand" "x")
654 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
657 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
658 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
659 [(set_attr "type" "ssemul")
660 (set_attr "prefix" "vex")
661 (set_attr "mode" "<ssescalarmode>")])
663 (define_insn "<sse>_vmmul<mode>3"
664 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
665 (vec_merge:SSEMODEF2P
667 (match_operand:SSEMODEF2P 1 "register_operand" "0")
668 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
671 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
672 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
673 [(set_attr "type" "ssemul")
674 (set_attr "mode" "<ssescalarmode>")])
676 (define_expand "divv8sf3"
677 [(set (match_operand:V8SF 0 "register_operand" "")
678 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
679 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
682 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
684 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
685 && flag_finite_math_only && !flag_trapping_math
686 && flag_unsafe_math_optimizations)
688 ix86_emit_swdivsf (operands[0], operands[1],
689 operands[2], V8SFmode);
694 (define_expand "divv4df3"
695 [(set (match_operand:V4DF 0 "register_operand" "")
696 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
697 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
699 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
701 (define_insn "avx_div<mode>3"
702 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
704 (match_operand:AVXMODEF2P 1 "register_operand" "x")
705 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
706 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
707 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
708 [(set_attr "type" "ssediv")
709 (set_attr "prefix" "vex")
710 (set_attr "mode" "<MODE>")])
712 (define_expand "divv4sf3"
713 [(set (match_operand:V4SF 0 "register_operand" "")
714 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
715 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
718 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
719 && flag_finite_math_only && !flag_trapping_math
720 && flag_unsafe_math_optimizations)
722 ix86_emit_swdivsf (operands[0], operands[1],
723 operands[2], V4SFmode);
728 (define_expand "divv2df3"
729 [(set (match_operand:V2DF 0 "register_operand" "")
730 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
731 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
734 (define_insn "*avx_div<mode>3"
735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
737 (match_operand:SSEMODEF2P 1 "register_operand" "x")
738 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
739 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
740 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
741 [(set_attr "type" "ssediv")
742 (set_attr "prefix" "vex")
743 (set_attr "mode" "<MODE>")])
745 (define_insn "<sse>_div<mode>3"
746 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
748 (match_operand:SSEMODEF2P 1 "register_operand" "0")
749 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
750 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
751 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
752 [(set_attr "type" "ssediv")
753 (set_attr "mode" "<MODE>")])
755 (define_insn "*avx_vmdiv<mode>3"
756 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
757 (vec_merge:SSEMODEF2P
759 (match_operand:SSEMODEF2P 1 "register_operand" "x")
760 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
763 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
764 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
765 [(set_attr "type" "ssediv")
766 (set_attr "prefix" "vex")
767 (set_attr "mode" "<ssescalarmode>")])
769 (define_insn "<sse>_vmdiv<mode>3"
770 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
771 (vec_merge:SSEMODEF2P
773 (match_operand:SSEMODEF2P 1 "register_operand" "0")
774 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
777 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
778 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
779 [(set_attr "type" "ssediv")
780 (set_attr "mode" "<ssescalarmode>")])
782 (define_insn "avx_rcpv8sf2"
783 [(set (match_operand:V8SF 0 "register_operand" "=x")
785 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
787 "vrcpps\t{%1, %0|%0, %1}"
788 [(set_attr "type" "sse")
789 (set_attr "prefix" "vex")
790 (set_attr "mode" "V8SF")])
792 (define_insn "sse_rcpv4sf2"
793 [(set (match_operand:V4SF 0 "register_operand" "=x")
795 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
797 "%vrcpps\t{%1, %0|%0, %1}"
798 [(set_attr "type" "sse")
799 (set_attr "atom_sse_attr" "rcp")
800 (set_attr "prefix" "maybe_vex")
801 (set_attr "mode" "V4SF")])
803 (define_insn "*avx_vmrcpv4sf2"
804 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
808 (match_operand:V4SF 2 "register_operand" "x")
811 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
812 [(set_attr "type" "sse")
813 (set_attr "prefix" "vex")
814 (set_attr "mode" "SF")])
816 (define_insn "sse_vmrcpv4sf2"
817 [(set (match_operand:V4SF 0 "register_operand" "=x")
819 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
821 (match_operand:V4SF 2 "register_operand" "0")
824 "rcpss\t{%1, %0|%0, %1}"
825 [(set_attr "type" "sse")
826 (set_attr "atom_sse_attr" "rcp")
827 (set_attr "mode" "SF")])
829 (define_expand "sqrtv8sf2"
830 [(set (match_operand:V8SF 0 "register_operand" "")
831 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
834 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
835 && flag_finite_math_only && !flag_trapping_math
836 && flag_unsafe_math_optimizations)
838 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
843 (define_insn "avx_sqrtv8sf2"
844 [(set (match_operand:V8SF 0 "register_operand" "=x")
845 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
847 "vsqrtps\t{%1, %0|%0, %1}"
848 [(set_attr "type" "sse")
849 (set_attr "prefix" "vex")
850 (set_attr "mode" "V8SF")])
852 (define_expand "sqrtv4sf2"
853 [(set (match_operand:V4SF 0 "register_operand" "")
854 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
857 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
858 && flag_finite_math_only && !flag_trapping_math
859 && flag_unsafe_math_optimizations)
861 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
866 (define_insn "sse_sqrtv4sf2"
867 [(set (match_operand:V4SF 0 "register_operand" "=x")
868 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
870 "%vsqrtps\t{%1, %0|%0, %1}"
871 [(set_attr "type" "sse")
872 (set_attr "atom_sse_attr" "sqrt")
873 (set_attr "prefix" "maybe_vex")
874 (set_attr "mode" "V4SF")])
876 (define_insn "sqrtv4df2"
877 [(set (match_operand:V4DF 0 "register_operand" "=x")
878 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
880 "vsqrtpd\t{%1, %0|%0, %1}"
881 [(set_attr "type" "sse")
882 (set_attr "prefix" "vex")
883 (set_attr "mode" "V4DF")])
885 (define_insn "sqrtv2df2"
886 [(set (match_operand:V2DF 0 "register_operand" "=x")
887 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
889 "%vsqrtpd\t{%1, %0|%0, %1}"
890 [(set_attr "type" "sse")
891 (set_attr "prefix" "maybe_vex")
892 (set_attr "mode" "V2DF")])
894 (define_insn "*avx_vmsqrt<mode>2"
895 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
896 (vec_merge:SSEMODEF2P
898 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
899 (match_operand:SSEMODEF2P 2 "register_operand" "x")
901 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
902 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
903 [(set_attr "type" "sse")
904 (set_attr "prefix" "vex")
905 (set_attr "mode" "<ssescalarmode>")])
907 (define_insn "<sse>_vmsqrt<mode>2"
908 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
909 (vec_merge:SSEMODEF2P
911 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
912 (match_operand:SSEMODEF2P 2 "register_operand" "0")
914 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
915 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
916 [(set_attr "type" "sse")
917 (set_attr "atom_sse_attr" "sqrt")
918 (set_attr "mode" "<ssescalarmode>")])
920 (define_expand "rsqrtv8sf2"
921 [(set (match_operand:V8SF 0 "register_operand" "")
923 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
924 "TARGET_AVX && TARGET_SSE_MATH"
926 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
930 (define_insn "avx_rsqrtv8sf2"
931 [(set (match_operand:V8SF 0 "register_operand" "=x")
933 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
935 "vrsqrtps\t{%1, %0|%0, %1}"
936 [(set_attr "type" "sse")
937 (set_attr "prefix" "vex")
938 (set_attr "mode" "V8SF")])
940 (define_expand "rsqrtv4sf2"
941 [(set (match_operand:V4SF 0 "register_operand" "")
943 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
946 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
950 (define_insn "sse_rsqrtv4sf2"
951 [(set (match_operand:V4SF 0 "register_operand" "=x")
953 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
955 "%vrsqrtps\t{%1, %0|%0, %1}"
956 [(set_attr "type" "sse")
957 (set_attr "prefix" "maybe_vex")
958 (set_attr "mode" "V4SF")])
960 (define_insn "*avx_vmrsqrtv4sf2"
961 [(set (match_operand:V4SF 0 "register_operand" "=x")
963 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
965 (match_operand:V4SF 2 "register_operand" "x")
968 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
969 [(set_attr "type" "sse")
970 (set_attr "prefix" "vex")
971 (set_attr "mode" "SF")])
973 (define_insn "sse_vmrsqrtv4sf2"
974 [(set (match_operand:V4SF 0 "register_operand" "=x")
976 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
978 (match_operand:V4SF 2 "register_operand" "0")
981 "rsqrtss\t{%1, %0|%0, %1}"
982 [(set_attr "type" "sse")
983 (set_attr "mode" "SF")])
985 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
986 ;; isn't really correct, as those rtl operators aren't defined when
987 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
989 (define_expand "<code><mode>3"
990 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
991 (smaxmin:AVX256MODEF2P
992 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
993 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
994 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
996 if (!flag_finite_math_only)
997 operands[1] = force_reg (<MODE>mode, operands[1]);
998 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1001 (define_expand "<code><mode>3"
1002 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1006 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1008 if (!flag_finite_math_only)
1009 operands[1] = force_reg (<MODE>mode, operands[1]);
1010 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1013 (define_insn "*avx_<code><mode>3_finite"
1014 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1016 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1017 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1018 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1019 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1020 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1021 [(set_attr "type" "sseadd")
1022 (set_attr "prefix" "vex")
1023 (set_attr "mode" "<MODE>")])
1025 (define_insn "*<code><mode>3_finite"
1026 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1028 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1029 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1030 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1031 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1032 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1033 [(set_attr "type" "sseadd")
1034 (set_attr "mode" "<MODE>")])
1036 (define_insn "*avx_<code><mode>3"
1037 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1039 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1040 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1041 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1042 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1043 [(set_attr "type" "sseadd")
1044 (set_attr "prefix" "vex")
1045 (set_attr "mode" "<avxvecmode>")])
1047 (define_insn "*<code><mode>3"
1048 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1050 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1051 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1052 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1053 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1054 [(set_attr "type" "sseadd")
1055 (set_attr "mode" "<MODE>")])
1057 (define_insn "*avx_vm<code><mode>3"
1058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1059 (vec_merge:SSEMODEF2P
1061 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1062 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1065 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1066 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1067 [(set_attr "type" "sse")
1068 (set_attr "prefix" "vex")
1069 (set_attr "mode" "<ssescalarmode>")])
1071 (define_insn "<sse>_vm<code><mode>3"
1072 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1073 (vec_merge:SSEMODEF2P
1075 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1076 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1079 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1080 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "mode" "<ssescalarmode>")])
1084 ;; These versions of the min/max patterns implement exactly the operations
1085 ;; min = (op1 < op2 ? op1 : op2)
1086 ;; max = (!(op1 < op2) ? op1 : op2)
1087 ;; Their operands are not commutative, and thus they may be used in the
1088 ;; presence of -0.0 and NaN.
1090 (define_insn "*avx_ieee_smin<mode>3"
1091 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1093 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1094 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1096 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1097 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "prefix" "vex")
1100 (set_attr "mode" "<avxvecmode>")])
1102 (define_insn "*avx_ieee_smax<mode>3"
1103 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1105 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1106 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1108 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1109 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1110 [(set_attr "type" "sseadd")
1111 (set_attr "prefix" "vex")
1112 (set_attr "mode" "<avxvecmode>")])
1114 (define_insn "*ieee_smin<mode>3"
1115 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1117 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1118 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1120 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1121 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1122 [(set_attr "type" "sseadd")
1123 (set_attr "mode" "<MODE>")])
1125 (define_insn "*ieee_smax<mode>3"
1126 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1128 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1129 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1131 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1132 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1133 [(set_attr "type" "sseadd")
1134 (set_attr "mode" "<MODE>")])
1136 (define_insn "avx_addsubv8sf3"
1137 [(set (match_operand:V8SF 0 "register_operand" "=x")
1140 (match_operand:V8SF 1 "register_operand" "x")
1141 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1142 (minus:V8SF (match_dup 1) (match_dup 2))
1145 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1146 [(set_attr "type" "sseadd")
1147 (set_attr "prefix" "vex")
1148 (set_attr "mode" "V8SF")])
1150 (define_insn "avx_addsubv4df3"
1151 [(set (match_operand:V4DF 0 "register_operand" "=x")
1154 (match_operand:V4DF 1 "register_operand" "x")
1155 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1156 (minus:V4DF (match_dup 1) (match_dup 2))
1159 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1160 [(set_attr "type" "sseadd")
1161 (set_attr "prefix" "vex")
1162 (set_attr "mode" "V4DF")])
1164 (define_insn "*avx_addsubv4sf3"
1165 [(set (match_operand:V4SF 0 "register_operand" "=x")
1168 (match_operand:V4SF 1 "register_operand" "x")
1169 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1170 (minus:V4SF (match_dup 1) (match_dup 2))
1173 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1174 [(set_attr "type" "sseadd")
1175 (set_attr "prefix" "vex")
1176 (set_attr "mode" "V4SF")])
1178 (define_insn "sse3_addsubv4sf3"
1179 [(set (match_operand:V4SF 0 "register_operand" "=x")
1182 (match_operand:V4SF 1 "register_operand" "0")
1183 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1184 (minus:V4SF (match_dup 1) (match_dup 2))
1187 "addsubps\t{%2, %0|%0, %2}"
1188 [(set_attr "type" "sseadd")
1189 (set_attr "prefix_rep" "1")
1190 (set_attr "mode" "V4SF")])
1192 (define_insn "*avx_addsubv2df3"
1193 [(set (match_operand:V2DF 0 "register_operand" "=x")
1196 (match_operand:V2DF 1 "register_operand" "x")
1197 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1198 (minus:V2DF (match_dup 1) (match_dup 2))
1201 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1202 [(set_attr "type" "sseadd")
1203 (set_attr "prefix" "vex")
1204 (set_attr "mode" "V2DF")])
1206 (define_insn "sse3_addsubv2df3"
1207 [(set (match_operand:V2DF 0 "register_operand" "=x")
1210 (match_operand:V2DF 1 "register_operand" "0")
1211 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1212 (minus:V2DF (match_dup 1) (match_dup 2))
1215 "addsubpd\t{%2, %0|%0, %2}"
1216 [(set_attr "type" "sseadd")
1217 (set_attr "atom_unit" "complex")
1218 (set_attr "mode" "V2DF")])
1220 (define_insn "avx_h<plusminus_insn>v4df3"
1221 [(set (match_operand:V4DF 0 "register_operand" "=x")
1226 (match_operand:V4DF 1 "register_operand" "x")
1227 (parallel [(const_int 0)]))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1230 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1235 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1236 (parallel [(const_int 0)]))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1239 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1242 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1243 [(set_attr "type" "sseadd")
1244 (set_attr "prefix" "vex")
1245 (set_attr "mode" "V4DF")])
1247 (define_insn "avx_h<plusminus_insn>v8sf3"
1248 [(set (match_operand:V8SF 0 "register_operand" "=x")
1254 (match_operand:V8SF 1 "register_operand" "x")
1255 (parallel [(const_int 0)]))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1258 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1263 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1264 (parallel [(const_int 0)]))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1267 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1279 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1282 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1285 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1286 [(set_attr "type" "sseadd")
1287 (set_attr "prefix" "vex")
1288 (set_attr "mode" "V8SF")])
1290 (define_insn "*avx_h<plusminus_insn>v4sf3"
1291 [(set (match_operand:V4SF 0 "register_operand" "=x")
1296 (match_operand:V4SF 1 "register_operand" "x")
1297 (parallel [(const_int 0)]))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1300 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1305 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1306 (parallel [(const_int 0)]))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1309 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1312 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1313 [(set_attr "type" "sseadd")
1314 (set_attr "prefix" "vex")
1315 (set_attr "mode" "V4SF")])
1317 (define_insn "sse3_h<plusminus_insn>v4sf3"
1318 [(set (match_operand:V4SF 0 "register_operand" "=x")
1323 (match_operand:V4SF 1 "register_operand" "0")
1324 (parallel [(const_int 0)]))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1327 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1332 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1333 (parallel [(const_int 0)]))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1336 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1339 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1340 [(set_attr "type" "sseadd")
1341 (set_attr "atom_unit" "complex")
1342 (set_attr "prefix_rep" "1")
1343 (set_attr "mode" "V4SF")])
1345 (define_insn "*avx_h<plusminus_insn>v2df3"
1346 [(set (match_operand:V2DF 0 "register_operand" "=x")
1350 (match_operand:V2DF 1 "register_operand" "x")
1351 (parallel [(const_int 0)]))
1352 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1355 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1356 (parallel [(const_int 0)]))
1357 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1359 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1360 [(set_attr "type" "sseadd")
1361 (set_attr "prefix" "vex")
1362 (set_attr "mode" "V2DF")])
1364 (define_insn "sse3_h<plusminus_insn>v2df3"
1365 [(set (match_operand:V2DF 0 "register_operand" "=x")
1369 (match_operand:V2DF 1 "register_operand" "0")
1370 (parallel [(const_int 0)]))
1371 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1374 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1375 (parallel [(const_int 0)]))
1376 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1378 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1379 [(set_attr "type" "sseadd")
1380 (set_attr "mode" "V2DF")])
1382 (define_expand "reduc_splus_v8sf"
1383 [(match_operand:V8SF 0 "register_operand" "")
1384 (match_operand:V8SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V8SFmode);
1388 rtx tmp2 = gen_reg_rtx (V8SFmode);
1389 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1390 emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
1391 emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
1395 (define_expand "reduc_splus_v4sf"
1396 [(match_operand:V4SF 0 "register_operand" "")
1397 (match_operand:V4SF 1 "register_operand" "")]
1402 rtx tmp = gen_reg_rtx (V4SFmode);
1403 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1404 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1407 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1411 (define_expand "reduc_splus_v4df"
1412 [(match_operand:V4DF 0 "register_operand" "")
1413 (match_operand:V4DF 1 "register_operand" "")]
1416 rtx tmp = gen_reg_rtx (V4DFmode);
1417 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1418 emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
1422 (define_expand "reduc_splus_v2df"
1423 [(match_operand:V2DF 0 "register_operand" "")
1424 (match_operand:V2DF 1 "register_operand" "")]
1427 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1431 (define_expand "reduc_smax_v4sf"
1432 [(match_operand:V4SF 0 "register_operand" "")
1433 (match_operand:V4SF 1 "register_operand" "")]
1436 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1440 (define_expand "reduc_smin_v4sf"
1441 [(match_operand:V4SF 0 "register_operand" "")
1442 (match_operand:V4SF 1 "register_operand" "")]
1445 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1451 ;; Parallel floating point comparisons
1453 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1455 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1456 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1458 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1459 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1460 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1463 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1464 [(set_attr "type" "ssecmp")
1465 (set_attr "length_immediate" "1")
1466 (set_attr "prefix" "vex")
1467 (set_attr "mode" "<MODE>")])
1469 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1470 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1471 (vec_merge:SSEMODEF2P
1473 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1474 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1475 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1480 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1481 [(set_attr "type" "ssecmp")
1482 (set_attr "length_immediate" "1")
1483 (set_attr "prefix" "vex")
1484 (set_attr "mode" "<ssescalarmode>")])
1486 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1487 ;; may generate 256bit vector compare instructions.
1488 (define_insn "*avx_maskcmp<mode>3"
1489 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1490 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1491 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1492 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1493 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1494 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1495 [(set_attr "type" "ssecmp")
1496 (set_attr "prefix" "vex")
1497 (set_attr "length_immediate" "1")
1498 (set_attr "mode" "<avxvecmode>")])
1500 (define_insn "<sse>_maskcmp<mode>3"
1501 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1502 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1503 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1504 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1506 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1507 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "ssecmp")
1509 (set_attr "length_immediate" "1")
1510 (set_attr "mode" "<MODE>")])
1512 (define_insn "*avx_vmmaskcmp<mode>3"
1513 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1514 (vec_merge:SSEMODEF2P
1515 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1516 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1520 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1521 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1522 [(set_attr "type" "ssecmp")
1523 (set_attr "prefix" "vex")
1524 (set_attr "mode" "<ssescalarmode>")])
1526 (define_insn "<sse>_vmmaskcmp<mode>3"
1527 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1528 (vec_merge:SSEMODEF2P
1529 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1530 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1531 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1534 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1535 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1536 [(set_attr "type" "ssecmp")
1537 (set_attr "length_immediate" "1")
1538 (set_attr "mode" "<ssescalarmode>")])
1540 (define_insn "<sse>_comi"
1541 [(set (reg:CCFP FLAGS_REG)
1544 (match_operand:<ssevecmode> 0 "register_operand" "x")
1545 (parallel [(const_int 0)]))
1547 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1548 (parallel [(const_int 0)]))))]
1549 "SSE_FLOAT_MODE_P (<MODE>mode)"
1550 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1551 [(set_attr "type" "ssecomi")
1552 (set_attr "prefix" "maybe_vex")
1553 (set_attr "prefix_rep" "0")
1554 (set (attr "prefix_data16")
1555 (if_then_else (eq_attr "mode" "DF")
1557 (const_string "0")))
1558 (set_attr "mode" "<MODE>")])
1560 (define_insn "<sse>_ucomi"
1561 [(set (reg:CCFPU FLAGS_REG)
1564 (match_operand:<ssevecmode> 0 "register_operand" "x")
1565 (parallel [(const_int 0)]))
1567 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1568 (parallel [(const_int 0)]))))]
1569 "SSE_FLOAT_MODE_P (<MODE>mode)"
1570 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1571 [(set_attr "type" "ssecomi")
1572 (set_attr "prefix" "maybe_vex")
1573 (set_attr "prefix_rep" "0")
1574 (set (attr "prefix_data16")
1575 (if_then_else (eq_attr "mode" "DF")
1577 (const_string "0")))
1578 (set_attr "mode" "<MODE>")])
1580 (define_expand "vcond<mode>"
1581 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1582 (if_then_else:AVXMODEF2P
1583 (match_operator 3 ""
1584 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1585 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1586 (match_operand:AVXMODEF2P 1 "general_operand" "")
1587 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1588 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1589 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1591 bool ok = ix86_expand_fp_vcond (operands);
1596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1598 ;; Parallel floating point logical operations
1600 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1602 (define_insn "avx_andnot<mode>3"
1603 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1606 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1607 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1608 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1609 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1610 [(set_attr "type" "sselog")
1611 (set_attr "prefix" "vex")
1612 (set_attr "mode" "<avxvecmode>")])
1614 (define_insn "<sse>_andnot<mode>3"
1615 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1618 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1619 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1620 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1621 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1622 [(set_attr "type" "sselog")
1623 (set_attr "mode" "<MODE>")])
1625 (define_expand "<code><mode>3"
1626 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1627 (any_logic:AVX256MODEF2P
1628 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1629 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1630 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1631 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1633 (define_insn "*avx_<code><mode>3"
1634 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1635 (any_logic:AVXMODEF2P
1636 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1637 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1638 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1639 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1641 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1642 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1644 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1646 [(set_attr "type" "sselog")
1647 (set_attr "prefix" "vex")
1648 (set_attr "mode" "<avxvecmode>")])
1650 (define_expand "<code><mode>3"
1651 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1652 (any_logic:SSEMODEF2P
1653 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1654 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1655 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1656 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1658 (define_insn "*<code><mode>3"
1659 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1660 (any_logic:SSEMODEF2P
1661 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1662 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1663 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1664 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1666 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1667 return "<logic>ps\t{%2, %0|%0, %2}";
1669 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1671 [(set_attr "type" "sselog")
1672 (set_attr "mode" "<MODE>")])
1674 (define_expand "copysign<mode>3"
1677 (not:VEC_FLOAT_MODE (match_dup 3))
1678 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1680 (and:VEC_FLOAT_MODE (match_dup 3)
1681 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1682 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1683 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1686 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1688 operands[4] = gen_reg_rtx (<MODE>mode);
1689 operands[5] = gen_reg_rtx (<MODE>mode);
1692 ;; Also define scalar versions. These are used for abs, neg, and
1693 ;; conditional move. Using subregs into vector modes causes register
1694 ;; allocation lossage. These patterns do not allow memory operands
1695 ;; because the native instructions read the full 128-bits.
1697 (define_insn "*avx_andnot<mode>3"
1698 [(set (match_operand:MODEF 0 "register_operand" "=x")
1701 (match_operand:MODEF 1 "register_operand" "x"))
1702 (match_operand:MODEF 2 "register_operand" "x")))]
1703 "AVX_FLOAT_MODE_P (<MODE>mode)"
1704 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1705 [(set_attr "type" "sselog")
1706 (set_attr "prefix" "vex")
1707 (set_attr "mode" "<ssevecmode>")])
1709 (define_insn "*andnot<mode>3"
1710 [(set (match_operand:MODEF 0 "register_operand" "=x")
1713 (match_operand:MODEF 1 "register_operand" "0"))
1714 (match_operand:MODEF 2 "register_operand" "x")))]
1715 "SSE_FLOAT_MODE_P (<MODE>mode)"
1716 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1717 [(set_attr "type" "sselog")
1718 (set_attr "mode" "<ssevecmode>")])
1720 (define_insn "*avx_<code><mode>3"
1721 [(set (match_operand:MODEF 0 "register_operand" "=x")
1723 (match_operand:MODEF 1 "register_operand" "x")
1724 (match_operand:MODEF 2 "register_operand" "x")))]
1725 "AVX_FLOAT_MODE_P (<MODE>mode)"
1727 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1728 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1730 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1732 [(set_attr "type" "sselog")
1733 (set_attr "prefix" "vex")
1734 (set_attr "mode" "<ssevecmode>")])
1736 (define_insn "*<code><mode>3"
1737 [(set (match_operand:MODEF 0 "register_operand" "=x")
1739 (match_operand:MODEF 1 "register_operand" "0")
1740 (match_operand:MODEF 2 "register_operand" "x")))]
1741 "SSE_FLOAT_MODE_P (<MODE>mode)"
1743 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1744 return "<logic>ps\t{%2, %0|%0, %2}";
1746 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1748 [(set_attr "type" "sselog")
1749 (set_attr "mode" "<ssevecmode>")])
1751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1753 ;; FMA4 floating point multiply/accumulate instructions. This
1754 ;; includes the scalar version of the instructions as well as the
1757 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1759 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1760 ;; combine to generate a multiply/add with two memory references. We then
1761 ;; split this insn, into loading up the destination register with one of the
1762 ;; memory operations. If we don't manage to split the insn, reload will
1763 ;; generate the appropriate moves. The reason this is needed, is that combine
1764 ;; has already folded one of the memory references into both the multiply and
1765 ;; add insns, and it can't generate a new pseudo. I.e.:
1766 ;; (set (reg1) (mem (addr1)))
1767 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1768 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1770 (define_insn "fma4_fmadd<mode>4256"
1771 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1774 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1775 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1776 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1777 "TARGET_FMA4 && TARGET_FUSED_MADD"
1778 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1779 [(set_attr "type" "ssemuladd")
1780 (set_attr "mode" "<MODE>")])
1782 ;; Floating multiply and subtract.
1783 (define_insn "fma4_fmsub<mode>4256"
1784 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1787 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1788 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1789 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1790 "TARGET_FMA4 && TARGET_FUSED_MADD"
1791 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 ;; Floating point negative multiply and add.
1796 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1797 (define_insn "fma4_fnmadd<mode>4256"
1798 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1800 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1802 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1803 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1804 "TARGET_FMA4 && TARGET_FUSED_MADD"
1805 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1806 [(set_attr "type" "ssemuladd")
1807 (set_attr "mode" "<MODE>")])
1809 ;; Floating point negative multiply and subtract.
1810 (define_insn "fma4_fnmsub<mode>4256"
1811 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1815 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1816 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1817 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1818 "TARGET_FMA4 && TARGET_FUSED_MADD"
1819 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1820 [(set_attr "type" "ssemuladd")
1821 (set_attr "mode" "<MODE>")])
1823 (define_insn "fma4_fmadd<mode>4"
1824 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1827 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1828 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1829 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1830 "TARGET_FMA4 && TARGET_FUSED_MADD"
1831 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1832 [(set_attr "type" "ssemuladd")
1833 (set_attr "mode" "<MODE>")])
1835 ;; For the scalar operations, use operand1 for the upper words that aren't
1836 ;; modified, so restrict the forms that are generated.
1837 ;; Scalar version of fmadd.
1838 (define_insn "fma4_vmfmadd<mode>4"
1839 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1840 (vec_merge:SSEMODEF2P
1843 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1844 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1845 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1848 "TARGET_FMA4 && TARGET_FUSED_MADD"
1849 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 ;; Floating multiply and subtract.
1854 ;; Allow two memory operands the same as fmadd.
1855 (define_insn "fma4_fmsub<mode>4"
1856 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1859 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1860 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1861 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1862 "TARGET_FMA4 && TARGET_FUSED_MADD"
1863 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1864 [(set_attr "type" "ssemuladd")
1865 (set_attr "mode" "<MODE>")])
1867 ;; For the scalar operations, use operand1 for the upper words that aren't
1868 ;; modified, so restrict the forms that are generated.
1869 ;; Scalar version of fmsub.
1870 (define_insn "fma4_vmfmsub<mode>4"
1871 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1872 (vec_merge:SSEMODEF2P
1875 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1876 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1877 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1880 "TARGET_FMA4 && TARGET_FUSED_MADD"
1881 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1882 [(set_attr "type" "ssemuladd")
1883 (set_attr "mode" "<MODE>")])
1885 ;; Floating point negative multiply and add.
1886 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1887 (define_insn "fma4_fnmadd<mode>4"
1888 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1890 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1892 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1893 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1894 "TARGET_FMA4 && TARGET_FUSED_MADD"
1895 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1896 [(set_attr "type" "ssemuladd")
1897 (set_attr "mode" "<MODE>")])
1899 ;; For the scalar operations, use operand1 for the upper words that aren't
1900 ;; modified, so restrict the forms that are generated.
1901 ;; Scalar version of fnmadd.
1902 (define_insn "fma4_vmfnmadd<mode>4"
1903 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1904 (vec_merge:SSEMODEF2P
1906 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1908 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1909 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1912 "TARGET_FMA4 && TARGET_FUSED_MADD"
1913 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1914 [(set_attr "type" "ssemuladd")
1915 (set_attr "mode" "<MODE>")])
1917 ;; Floating point negative multiply and subtract.
1918 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1919 (define_insn "fma4_fnmsub<mode>4"
1920 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1924 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1925 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1926 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1927 "TARGET_FMA4 && TARGET_FUSED_MADD"
1928 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929 [(set_attr "type" "ssemuladd")
1930 (set_attr "mode" "<MODE>")])
1932 ;; For the scalar operations, use operand1 for the upper words that aren't
1933 ;; modified, so restrict the forms that are generated.
1934 ;; Scalar version of fnmsub.
1935 (define_insn "fma4_vmfnmsub<mode>4"
1936 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1937 (vec_merge:SSEMODEF2P
1941 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1942 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1943 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1946 "TARGET_FMA4 && TARGET_FUSED_MADD"
1947 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1948 [(set_attr "type" "ssemuladd")
1949 (set_attr "mode" "<MODE>")])
1951 (define_insn "fma4i_fmadd<mode>4256"
1952 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1956 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1957 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1958 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1959 UNSPEC_FMA4_INTRINSIC))]
1961 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1962 [(set_attr "type" "ssemuladd")
1963 (set_attr "mode" "<MODE>")])
1965 (define_insn "fma4i_fmsub<mode>4256"
1966 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1970 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1971 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1972 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1973 UNSPEC_FMA4_INTRINSIC))]
1975 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1976 [(set_attr "type" "ssemuladd")
1977 (set_attr "mode" "<MODE>")])
1979 (define_insn "fma4i_fnmadd<mode>4256"
1980 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1983 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1985 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1986 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1987 UNSPEC_FMA4_INTRINSIC))]
1989 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1990 [(set_attr "type" "ssemuladd")
1991 (set_attr "mode" "<MODE>")])
1993 (define_insn "fma4i_fnmsub<mode>4256"
1994 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1999 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
2000 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
2001 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2002 UNSPEC_FMA4_INTRINSIC))]
2004 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2005 [(set_attr "type" "ssemuladd")
2006 (set_attr "mode" "<MODE>")])
2008 (define_insn "fma4i_fmadd<mode>4"
2009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2013 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2015 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2016 UNSPEC_FMA4_INTRINSIC))]
2018 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2019 [(set_attr "type" "ssemuladd")
2020 (set_attr "mode" "<MODE>")])
2022 (define_insn "fma4i_fmsub<mode>4"
2023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2027 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2028 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2029 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2030 UNSPEC_FMA4_INTRINSIC))]
2032 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2033 [(set_attr "type" "ssemuladd")
2034 (set_attr "mode" "<MODE>")])
2036 (define_insn "fma4i_fnmadd<mode>4"
2037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2040 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2042 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2044 UNSPEC_FMA4_INTRINSIC))]
2046 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2047 [(set_attr "type" "ssemuladd")
2048 (set_attr "mode" "<MODE>")])
2050 (define_insn "fma4i_fnmsub<mode>4"
2051 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2057 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2058 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2059 UNSPEC_FMA4_INTRINSIC))]
2061 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2062 [(set_attr "type" "ssemuladd")
2063 (set_attr "mode" "<MODE>")])
2065 ;; For the scalar operations, use operand1 for the upper words that aren't
2066 ;; modified, so restrict the forms that are accepted.
2067 (define_insn "fma4i_vmfmadd<mode>4"
2068 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2070 [(vec_merge:SSEMODEF2P
2073 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2075 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2078 UNSPEC_FMA4_INTRINSIC))]
2080 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2081 [(set_attr "type" "ssemuladd")
2082 (set_attr "mode" "<ssescalarmode>")])
2084 (define_insn "fma4i_vmfmsub<mode>4"
2085 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2087 [(vec_merge:SSEMODEF2P
2090 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2091 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2092 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2095 UNSPEC_FMA4_INTRINSIC))]
2097 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "<ssescalarmode>")])
2101 (define_insn "fma4i_vmfnmadd<mode>4"
2102 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2104 [(vec_merge:SSEMODEF2P
2106 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2108 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2109 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2112 UNSPEC_FMA4_INTRINSIC))]
2114 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2115 [(set_attr "type" "ssemuladd")
2116 (set_attr "mode" "<ssescalarmode>")])
2118 (define_insn "fma4i_vmfnmsub<mode>4"
2119 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2121 [(vec_merge:SSEMODEF2P
2125 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2126 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2127 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2130 UNSPEC_FMA4_INTRINSIC))]
2132 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2133 [(set_attr "type" "ssemuladd")
2134 (set_attr "mode" "<ssescalarmode>")])
2136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2138 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2142 (define_insn "fma4_fmaddsubv8sf4"
2143 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2147 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2148 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2149 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2156 "TARGET_FMA4 && TARGET_FUSED_MADD"
2157 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2158 [(set_attr "type" "ssemuladd")
2159 (set_attr "mode" "V8SF")])
2161 (define_insn "fma4_fmaddsubv4df4"
2162 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2166 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2167 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2168 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2175 "TARGET_FMA4 && TARGET_FUSED_MADD"
2176 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2177 [(set_attr "type" "ssemuladd")
2178 (set_attr "mode" "V4DF")])
2180 (define_insn "fma4_fmaddsubv4sf4"
2181 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2185 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2186 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2187 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2194 "TARGET_FMA4 && TARGET_FUSED_MADD"
2195 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2196 [(set_attr "type" "ssemuladd")
2197 (set_attr "mode" "V4SF")])
2199 (define_insn "fma4_fmaddsubv2df4"
2200 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2204 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2205 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2206 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2213 "TARGET_FMA4 && TARGET_FUSED_MADD"
2214 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2215 [(set_attr "type" "ssemuladd")
2216 (set_attr "mode" "V2DF")])
2218 (define_insn "fma4_fmsubaddv8sf4"
2219 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2223 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2224 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2225 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2232 "TARGET_FMA4 && TARGET_FUSED_MADD"
2233 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2234 [(set_attr "type" "ssemuladd")
2235 (set_attr "mode" "V8SF")])
2237 (define_insn "fma4_fmsubaddv4df4"
2238 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2242 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2243 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2244 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2251 "TARGET_FMA4 && TARGET_FUSED_MADD"
2252 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2253 [(set_attr "type" "ssemuladd")
2254 (set_attr "mode" "V4DF")])
2256 (define_insn "fma4_fmsubaddv4sf4"
2257 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2261 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2262 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2263 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2270 "TARGET_FMA4 && TARGET_FUSED_MADD"
2271 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2272 [(set_attr "type" "ssemuladd")
2273 (set_attr "mode" "V4SF")])
2275 (define_insn "fma4_fmsubaddv2df4"
2276 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2280 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2281 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2282 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2289 "TARGET_FMA4 && TARGET_FUSED_MADD"
2290 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2291 [(set_attr "type" "ssemuladd")
2292 (set_attr "mode" "V2DF")])
2294 (define_insn "fma4i_fmaddsubv8sf4"
2295 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2300 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2301 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2302 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2309 UNSPEC_FMA4_INTRINSIC))]
2311 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2312 [(set_attr "type" "ssemuladd")
2313 (set_attr "mode" "V8SF")])
2315 (define_insn "fma4i_fmaddsubv4df4"
2316 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2321 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2322 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2323 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2330 UNSPEC_FMA4_INTRINSIC))]
2332 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2333 [(set_attr "type" "ssemuladd")
2334 (set_attr "mode" "V4DF")])
2336 (define_insn "fma4i_fmaddsubv4sf4"
2337 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2342 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2343 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2344 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2351 UNSPEC_FMA4_INTRINSIC))]
2353 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2354 [(set_attr "type" "ssemuladd")
2355 (set_attr "mode" "V4SF")])
2357 (define_insn "fma4i_fmaddsubv2df4"
2358 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2363 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2364 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2365 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2372 UNSPEC_FMA4_INTRINSIC))]
2374 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2375 [(set_attr "type" "ssemuladd")
2376 (set_attr "mode" "V2DF")])
2378 (define_insn "fma4i_fmsubaddv8sf4"
2379 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2384 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2385 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2386 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2393 UNSPEC_FMA4_INTRINSIC))]
2395 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2396 [(set_attr "type" "ssemuladd")
2397 (set_attr "mode" "V8SF")])
2399 (define_insn "fma4i_fmsubaddv4df4"
2400 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2405 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2406 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2407 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2414 UNSPEC_FMA4_INTRINSIC))]
2416 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2417 [(set_attr "type" "ssemuladd")
2418 (set_attr "mode" "V4DF")])
2420 (define_insn "fma4i_fmsubaddv4sf4"
2421 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2426 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2427 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2428 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2435 UNSPEC_FMA4_INTRINSIC))]
2437 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2438 [(set_attr "type" "ssemuladd")
2439 (set_attr "mode" "V4SF")])
2441 (define_insn "fma4i_fmsubaddv2df4"
2442 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2447 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2448 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2449 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2456 UNSPEC_FMA4_INTRINSIC))]
2458 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2459 [(set_attr "type" "ssemuladd")
2460 (set_attr "mode" "V2DF")])
2462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2464 ;; Parallel single-precision floating point conversion operations
2466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2468 (define_insn "sse_cvtpi2ps"
2469 [(set (match_operand:V4SF 0 "register_operand" "=x")
2472 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2473 (match_operand:V4SF 1 "register_operand" "0")
2476 "cvtpi2ps\t{%2, %0|%0, %2}"
2477 [(set_attr "type" "ssecvt")
2478 (set_attr "mode" "V4SF")])
2480 (define_insn "sse_cvtps2pi"
2481 [(set (match_operand:V2SI 0 "register_operand" "=y")
2483 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2485 (parallel [(const_int 0) (const_int 1)])))]
2487 "cvtps2pi\t{%1, %0|%0, %1}"
2488 [(set_attr "type" "ssecvt")
2489 (set_attr "unit" "mmx")
2490 (set_attr "mode" "DI")])
2492 (define_insn "sse_cvttps2pi"
2493 [(set (match_operand:V2SI 0 "register_operand" "=y")
2495 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2496 (parallel [(const_int 0) (const_int 1)])))]
2498 "cvttps2pi\t{%1, %0|%0, %1}"
2499 [(set_attr "type" "ssecvt")
2500 (set_attr "unit" "mmx")
2501 (set_attr "prefix_rep" "0")
2502 (set_attr "mode" "SF")])
2504 (define_insn "*avx_cvtsi2ss"
2505 [(set (match_operand:V4SF 0 "register_operand" "=x")
2508 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2509 (match_operand:V4SF 1 "register_operand" "x")
2512 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2513 [(set_attr "type" "sseicvt")
2514 (set_attr "prefix" "vex")
2515 (set_attr "mode" "SF")])
2517 (define_insn "sse_cvtsi2ss"
2518 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2521 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2522 (match_operand:V4SF 1 "register_operand" "0,0")
2525 "cvtsi2ss\t{%2, %0|%0, %2}"
2526 [(set_attr "type" "sseicvt")
2527 (set_attr "athlon_decode" "vector,double")
2528 (set_attr "amdfam10_decode" "vector,double")
2529 (set_attr "mode" "SF")])
2531 (define_insn "*avx_cvtsi2ssq"
2532 [(set (match_operand:V4SF 0 "register_operand" "=x")
2535 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2536 (match_operand:V4SF 1 "register_operand" "x")
2538 "TARGET_AVX && TARGET_64BIT"
2539 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2540 [(set_attr "type" "sseicvt")
2541 (set_attr "length_vex" "4")
2542 (set_attr "prefix" "vex")
2543 (set_attr "mode" "SF")])
2545 (define_insn "sse_cvtsi2ssq"
2546 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2549 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2550 (match_operand:V4SF 1 "register_operand" "0,0")
2552 "TARGET_SSE && TARGET_64BIT"
2553 "cvtsi2ssq\t{%2, %0|%0, %2}"
2554 [(set_attr "type" "sseicvt")
2555 (set_attr "prefix_rex" "1")
2556 (set_attr "athlon_decode" "vector,double")
2557 (set_attr "amdfam10_decode" "vector,double")
2558 (set_attr "mode" "SF")])
2560 (define_insn "sse_cvtss2si"
2561 [(set (match_operand:SI 0 "register_operand" "=r,r")
2564 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2565 (parallel [(const_int 0)]))]
2566 UNSPEC_FIX_NOTRUNC))]
2568 "%vcvtss2si\t{%1, %0|%0, %1}"
2569 [(set_attr "type" "sseicvt")
2570 (set_attr "athlon_decode" "double,vector")
2571 (set_attr "prefix_rep" "1")
2572 (set_attr "prefix" "maybe_vex")
2573 (set_attr "mode" "SI")])
2575 (define_insn "sse_cvtss2si_2"
2576 [(set (match_operand:SI 0 "register_operand" "=r,r")
2577 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2578 UNSPEC_FIX_NOTRUNC))]
2580 "%vcvtss2si\t{%1, %0|%0, %1}"
2581 [(set_attr "type" "sseicvt")
2582 (set_attr "athlon_decode" "double,vector")
2583 (set_attr "amdfam10_decode" "double,double")
2584 (set_attr "prefix_rep" "1")
2585 (set_attr "prefix" "maybe_vex")
2586 (set_attr "mode" "SI")])
2588 (define_insn "sse_cvtss2siq"
2589 [(set (match_operand:DI 0 "register_operand" "=r,r")
2592 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2593 (parallel [(const_int 0)]))]
2594 UNSPEC_FIX_NOTRUNC))]
2595 "TARGET_SSE && TARGET_64BIT"
2596 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2597 [(set_attr "type" "sseicvt")
2598 (set_attr "athlon_decode" "double,vector")
2599 (set_attr "prefix_rep" "1")
2600 (set_attr "prefix" "maybe_vex")
2601 (set_attr "mode" "DI")])
2603 (define_insn "sse_cvtss2siq_2"
2604 [(set (match_operand:DI 0 "register_operand" "=r,r")
2605 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2606 UNSPEC_FIX_NOTRUNC))]
2607 "TARGET_SSE && TARGET_64BIT"
2608 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2609 [(set_attr "type" "sseicvt")
2610 (set_attr "athlon_decode" "double,vector")
2611 (set_attr "amdfam10_decode" "double,double")
2612 (set_attr "prefix_rep" "1")
2613 (set_attr "prefix" "maybe_vex")
2614 (set_attr "mode" "DI")])
2616 (define_insn "sse_cvttss2si"
2617 [(set (match_operand:SI 0 "register_operand" "=r,r")
2620 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2621 (parallel [(const_int 0)]))))]
2623 "%vcvttss2si\t{%1, %0|%0, %1}"
2624 [(set_attr "type" "sseicvt")
2625 (set_attr "athlon_decode" "double,vector")
2626 (set_attr "amdfam10_decode" "double,double")
2627 (set_attr "prefix_rep" "1")
2628 (set_attr "prefix" "maybe_vex")
2629 (set_attr "mode" "SI")])
2631 (define_insn "sse_cvttss2siq"
2632 [(set (match_operand:DI 0 "register_operand" "=r,r")
2635 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2636 (parallel [(const_int 0)]))))]
2637 "TARGET_SSE && TARGET_64BIT"
2638 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2639 [(set_attr "type" "sseicvt")
2640 (set_attr "athlon_decode" "double,vector")
2641 (set_attr "amdfam10_decode" "double,double")
2642 (set_attr "prefix_rep" "1")
2643 (set_attr "prefix" "maybe_vex")
2644 (set_attr "mode" "DI")])
2646 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2647 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2648 (float:AVXMODEDCVTDQ2PS
2649 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2651 "vcvtdq2ps\t{%1, %0|%0, %1}"
2652 [(set_attr "type" "ssecvt")
2653 (set_attr "prefix" "vex")
2654 (set_attr "mode" "<avxvecmode>")])
2656 (define_insn "sse2_cvtdq2ps"
2657 [(set (match_operand:V4SF 0 "register_operand" "=x")
2658 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2660 "cvtdq2ps\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "ssecvt")
2662 (set_attr "mode" "V4SF")])
2664 (define_expand "sse2_cvtudq2ps"
2666 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2668 (lt:V4SF (match_dup 5) (match_dup 3)))
2670 (and:V4SF (match_dup 6) (match_dup 4)))
2671 (set (match_operand:V4SF 0 "register_operand" "")
2672 (plus:V4SF (match_dup 5) (match_dup 7)))]
2675 REAL_VALUE_TYPE TWO32r;
2679 real_ldexp (&TWO32r, &dconst1, 32);
2680 x = const_double_from_real_value (TWO32r, SFmode);
2682 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2683 operands[4] = force_reg (V4SFmode,
2684 ix86_build_const_vector (V4SFmode, 1, x));
2686 for (i = 5; i < 8; i++)
2687 operands[i] = gen_reg_rtx (V4SFmode);
2690 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2691 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2692 (unspec:AVXMODEDCVTPS2DQ
2693 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2694 UNSPEC_FIX_NOTRUNC))]
2696 "vcvtps2dq\t{%1, %0|%0, %1}"
2697 [(set_attr "type" "ssecvt")
2698 (set_attr "prefix" "vex")
2699 (set_attr "mode" "<avxvecmode>")])
2701 (define_insn "sse2_cvtps2dq"
2702 [(set (match_operand:V4SI 0 "register_operand" "=x")
2703 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2704 UNSPEC_FIX_NOTRUNC))]
2706 "cvtps2dq\t{%1, %0|%0, %1}"
2707 [(set_attr "type" "ssecvt")
2708 (set_attr "prefix_data16" "1")
2709 (set_attr "mode" "TI")])
2711 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2712 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2713 (fix:AVXMODEDCVTPS2DQ
2714 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2716 "vcvttps2dq\t{%1, %0|%0, %1}"
2717 [(set_attr "type" "ssecvt")
2718 (set_attr "prefix" "vex")
2719 (set_attr "mode" "<avxvecmode>")])
2721 (define_insn "sse2_cvttps2dq"
2722 [(set (match_operand:V4SI 0 "register_operand" "=x")
2723 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2725 "cvttps2dq\t{%1, %0|%0, %1}"
2726 [(set_attr "type" "ssecvt")
2727 (set_attr "prefix_rep" "1")
2728 (set_attr "prefix_data16" "0")
2729 (set_attr "mode" "TI")])
2731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2733 ;; Parallel double-precision floating point conversion operations
2735 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2737 (define_insn "sse2_cvtpi2pd"
2738 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2739 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2741 "cvtpi2pd\t{%1, %0|%0, %1}"
2742 [(set_attr "type" "ssecvt")
2743 (set_attr "unit" "mmx,*")
2744 (set_attr "prefix_data16" "1,*")
2745 (set_attr "mode" "V2DF")])
2747 (define_insn "sse2_cvtpd2pi"
2748 [(set (match_operand:V2SI 0 "register_operand" "=y")
2749 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2750 UNSPEC_FIX_NOTRUNC))]
2752 "cvtpd2pi\t{%1, %0|%0, %1}"
2753 [(set_attr "type" "ssecvt")
2754 (set_attr "unit" "mmx")
2755 (set_attr "prefix_data16" "1")
2756 (set_attr "mode" "DI")])
2758 (define_insn "sse2_cvttpd2pi"
2759 [(set (match_operand:V2SI 0 "register_operand" "=y")
2760 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2762 "cvttpd2pi\t{%1, %0|%0, %1}"
2763 [(set_attr "type" "ssecvt")
2764 (set_attr "unit" "mmx")
2765 (set_attr "prefix_data16" "1")
2766 (set_attr "mode" "TI")])
2768 (define_insn "*avx_cvtsi2sd"
2769 [(set (match_operand:V2DF 0 "register_operand" "=x")
2772 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2773 (match_operand:V2DF 1 "register_operand" "x")
2776 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2777 [(set_attr "type" "sseicvt")
2778 (set_attr "prefix" "vex")
2779 (set_attr "mode" "DF")])
2781 (define_insn "sse2_cvtsi2sd"
2782 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2785 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2786 (match_operand:V2DF 1 "register_operand" "0,0")
2789 "cvtsi2sd\t{%2, %0|%0, %2}"
2790 [(set_attr "type" "sseicvt")
2791 (set_attr "mode" "DF")
2792 (set_attr "athlon_decode" "double,direct")
2793 (set_attr "amdfam10_decode" "vector,double")])
2795 (define_insn "*avx_cvtsi2sdq"
2796 [(set (match_operand:V2DF 0 "register_operand" "=x")
2799 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2800 (match_operand:V2DF 1 "register_operand" "x")
2802 "TARGET_AVX && TARGET_64BIT"
2803 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2804 [(set_attr "type" "sseicvt")
2805 (set_attr "length_vex" "4")
2806 (set_attr "prefix" "vex")
2807 (set_attr "mode" "DF")])
2809 (define_insn "sse2_cvtsi2sdq"
2810 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2813 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2814 (match_operand:V2DF 1 "register_operand" "0,0")
2816 "TARGET_SSE2 && TARGET_64BIT"
2817 "cvtsi2sdq\t{%2, %0|%0, %2}"
2818 [(set_attr "type" "sseicvt")
2819 (set_attr "prefix_rex" "1")
2820 (set_attr "mode" "DF")
2821 (set_attr "athlon_decode" "double,direct")
2822 (set_attr "amdfam10_decode" "vector,double")])
2824 (define_insn "sse2_cvtsd2si"
2825 [(set (match_operand:SI 0 "register_operand" "=r,r")
2828 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2829 (parallel [(const_int 0)]))]
2830 UNSPEC_FIX_NOTRUNC))]
2832 "%vcvtsd2si\t{%1, %0|%0, %1}"
2833 [(set_attr "type" "sseicvt")
2834 (set_attr "athlon_decode" "double,vector")
2835 (set_attr "prefix_rep" "1")
2836 (set_attr "prefix" "maybe_vex")
2837 (set_attr "mode" "SI")])
2839 (define_insn "sse2_cvtsd2si_2"
2840 [(set (match_operand:SI 0 "register_operand" "=r,r")
2841 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2842 UNSPEC_FIX_NOTRUNC))]
2844 "%vcvtsd2si\t{%1, %0|%0, %1}"
2845 [(set_attr "type" "sseicvt")
2846 (set_attr "athlon_decode" "double,vector")
2847 (set_attr "amdfam10_decode" "double,double")
2848 (set_attr "prefix_rep" "1")
2849 (set_attr "prefix" "maybe_vex")
2850 (set_attr "mode" "SI")])
2852 (define_insn "sse2_cvtsd2siq"
2853 [(set (match_operand:DI 0 "register_operand" "=r,r")
2856 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2857 (parallel [(const_int 0)]))]
2858 UNSPEC_FIX_NOTRUNC))]
2859 "TARGET_SSE2 && TARGET_64BIT"
2860 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2861 [(set_attr "type" "sseicvt")
2862 (set_attr "athlon_decode" "double,vector")
2863 (set_attr "prefix_rep" "1")
2864 (set_attr "prefix" "maybe_vex")
2865 (set_attr "mode" "DI")])
2867 (define_insn "sse2_cvtsd2siq_2"
2868 [(set (match_operand:DI 0 "register_operand" "=r,r")
2869 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2870 UNSPEC_FIX_NOTRUNC))]
2871 "TARGET_SSE2 && TARGET_64BIT"
2872 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2873 [(set_attr "type" "sseicvt")
2874 (set_attr "athlon_decode" "double,vector")
2875 (set_attr "amdfam10_decode" "double,double")
2876 (set_attr "prefix_rep" "1")
2877 (set_attr "prefix" "maybe_vex")
2878 (set_attr "mode" "DI")])
2880 (define_insn "sse2_cvttsd2si"
2881 [(set (match_operand:SI 0 "register_operand" "=r,r")
2884 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2885 (parallel [(const_int 0)]))))]
2887 "%vcvttsd2si\t{%1, %0|%0, %1}"
2888 [(set_attr "type" "sseicvt")
2889 (set_attr "prefix_rep" "1")
2890 (set_attr "prefix" "maybe_vex")
2891 (set_attr "mode" "SI")
2892 (set_attr "athlon_decode" "double,vector")
2893 (set_attr "amdfam10_decode" "double,double")])
2895 (define_insn "sse2_cvttsd2siq"
2896 [(set (match_operand:DI 0 "register_operand" "=r,r")
2899 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2900 (parallel [(const_int 0)]))))]
2901 "TARGET_SSE2 && TARGET_64BIT"
2902 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "sseicvt")
2904 (set_attr "prefix_rep" "1")
2905 (set_attr "prefix" "maybe_vex")
2906 (set_attr "mode" "DI")
2907 (set_attr "athlon_decode" "double,vector")
2908 (set_attr "amdfam10_decode" "double,double")])
2910 (define_insn "avx_cvtdq2pd256"
2911 [(set (match_operand:V4DF 0 "register_operand" "=x")
2912 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2914 "vcvtdq2pd\t{%1, %0|%0, %1}"
2915 [(set_attr "type" "ssecvt")
2916 (set_attr "prefix" "vex")
2917 (set_attr "mode" "V4DF")])
2919 (define_insn "*avx_cvtdq2pd256_2"
2920 [(set (match_operand:V4DF 0 "register_operand" "=x")
2923 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2924 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2926 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2927 [(set_attr "type" "ssecvt")
2928 (set_attr "prefix" "vex")
2929 (set_attr "mode" "V4DF")])
2931 (define_insn "sse2_cvtdq2pd"
2932 [(set (match_operand:V2DF 0 "register_operand" "=x")
2935 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2936 (parallel [(const_int 0) (const_int 1)]))))]
2938 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2939 [(set_attr "type" "ssecvt")
2940 (set_attr "prefix" "maybe_vex")
2941 (set_attr "mode" "V2DF")])
2943 (define_insn "avx_cvtpd2dq256"
2944 [(set (match_operand:V4SI 0 "register_operand" "=x")
2945 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2946 UNSPEC_FIX_NOTRUNC))]
2948 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2949 [(set_attr "type" "ssecvt")
2950 (set_attr "prefix" "vex")
2951 (set_attr "mode" "OI")])
2953 (define_expand "sse2_cvtpd2dq"
2954 [(set (match_operand:V4SI 0 "register_operand" "")
2956 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2960 "operands[2] = CONST0_RTX (V2SImode);")
2962 (define_insn "*sse2_cvtpd2dq"
2963 [(set (match_operand:V4SI 0 "register_operand" "=x")
2965 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2967 (match_operand:V2SI 2 "const0_operand" "")))]
2969 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2970 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2971 [(set_attr "type" "ssecvt")
2972 (set_attr "prefix_rep" "1")
2973 (set_attr "prefix_data16" "0")
2974 (set_attr "prefix" "maybe_vex")
2975 (set_attr "mode" "TI")
2976 (set_attr "amdfam10_decode" "double")])
2978 (define_insn "avx_cvttpd2dq256"
2979 [(set (match_operand:V4SI 0 "register_operand" "=x")
2980 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2982 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2983 [(set_attr "type" "ssecvt")
2984 (set_attr "prefix" "vex")
2985 (set_attr "mode" "OI")])
2987 (define_expand "sse2_cvttpd2dq"
2988 [(set (match_operand:V4SI 0 "register_operand" "")
2990 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2993 "operands[2] = CONST0_RTX (V2SImode);")
2995 (define_insn "*sse2_cvttpd2dq"
2996 [(set (match_operand:V4SI 0 "register_operand" "=x")
2998 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2999 (match_operand:V2SI 2 "const0_operand" "")))]
3001 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
3002 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
3003 [(set_attr "type" "ssecvt")
3004 (set_attr "prefix" "maybe_vex")
3005 (set_attr "mode" "TI")
3006 (set_attr "amdfam10_decode" "double")])
3008 (define_insn "*avx_cvtsd2ss"
3009 [(set (match_operand:V4SF 0 "register_operand" "=x")
3012 (float_truncate:V2SF
3013 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
3014 (match_operand:V4SF 1 "register_operand" "x")
3017 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
3018 [(set_attr "type" "ssecvt")
3019 (set_attr "prefix" "vex")
3020 (set_attr "mode" "SF")])
3022 (define_insn "sse2_cvtsd2ss"
3023 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3026 (float_truncate:V2SF
3027 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3028 (match_operand:V4SF 1 "register_operand" "0,0")
3031 "cvtsd2ss\t{%2, %0|%0, %2}"
3032 [(set_attr "type" "ssecvt")
3033 (set_attr "athlon_decode" "vector,double")
3034 (set_attr "amdfam10_decode" "vector,double")
3035 (set_attr "mode" "SF")])
3037 (define_insn "*avx_cvtss2sd"
3038 [(set (match_operand:V2DF 0 "register_operand" "=x")
3042 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3043 (parallel [(const_int 0) (const_int 1)])))
3044 (match_operand:V2DF 1 "register_operand" "x")
3047 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3048 [(set_attr "type" "ssecvt")
3049 (set_attr "prefix" "vex")
3050 (set_attr "mode" "DF")])
3052 (define_insn "sse2_cvtss2sd"
3053 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3057 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3058 (parallel [(const_int 0) (const_int 1)])))
3059 (match_operand:V2DF 1 "register_operand" "0,0")
3062 "cvtss2sd\t{%2, %0|%0, %2}"
3063 [(set_attr "type" "ssecvt")
3064 (set_attr "amdfam10_decode" "vector,double")
3065 (set_attr "mode" "DF")])
3067 (define_insn "avx_cvtpd2ps256"
3068 [(set (match_operand:V4SF 0 "register_operand" "=x")
3069 (float_truncate:V4SF
3070 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3072 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3073 [(set_attr "type" "ssecvt")
3074 (set_attr "prefix" "vex")
3075 (set_attr "mode" "V4SF")])
3077 (define_expand "sse2_cvtpd2ps"
3078 [(set (match_operand:V4SF 0 "register_operand" "")
3080 (float_truncate:V2SF
3081 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3084 "operands[2] = CONST0_RTX (V2SFmode);")
3086 (define_insn "*sse2_cvtpd2ps"
3087 [(set (match_operand:V4SF 0 "register_operand" "=x")
3089 (float_truncate:V2SF
3090 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3091 (match_operand:V2SF 2 "const0_operand" "")))]
3093 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3094 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3095 [(set_attr "type" "ssecvt")
3096 (set_attr "prefix_data16" "1")
3097 (set_attr "prefix" "maybe_vex")
3098 (set_attr "mode" "V4SF")
3099 (set_attr "amdfam10_decode" "double")])
3101 (define_insn "avx_cvtps2pd256"
3102 [(set (match_operand:V4DF 0 "register_operand" "=x")
3104 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3106 "vcvtps2pd\t{%1, %0|%0, %1}"
3107 [(set_attr "type" "ssecvt")
3108 (set_attr "prefix" "vex")
3109 (set_attr "mode" "V4DF")])
3111 (define_insn "*avx_cvtps2pd256_2"
3112 [(set (match_operand:V4DF 0 "register_operand" "=x")
3115 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3116 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3118 "vcvtps2pd\t{%x1, %0|%0, %x1}"
3119 [(set_attr "type" "ssecvt")
3120 (set_attr "prefix" "vex")
3121 (set_attr "mode" "V4DF")])
3123 (define_insn "sse2_cvtps2pd"
3124 [(set (match_operand:V2DF 0 "register_operand" "=x")
3127 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3128 (parallel [(const_int 0) (const_int 1)]))))]
3130 "%vcvtps2pd\t{%1, %0|%0, %1}"
3131 [(set_attr "type" "ssecvt")
3132 (set_attr "prefix" "maybe_vex")
3133 (set_attr "mode" "V2DF")
3134 (set_attr "prefix_data16" "0")
3135 (set_attr "amdfam10_decode" "direct")])
3137 (define_expand "vec_unpacks_hi_v4sf"
3142 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3143 (parallel [(const_int 6)
3147 (set (match_operand:V2DF 0 "register_operand" "")
3151 (parallel [(const_int 0) (const_int 1)]))))]
3153 "operands[2] = gen_reg_rtx (V4SFmode);")
3155 (define_expand "vec_unpacks_hi_v8sf"
3158 (match_operand:V8SF 1 "nonimmediate_operand" "")
3159 (parallel [(const_int 4)
3163 (set (match_operand:V4DF 0 "register_operand" "")
3168 operands[2] = gen_reg_rtx (V4SFmode);
3171 (define_expand "vec_unpacks_lo_v4sf"
3172 [(set (match_operand:V2DF 0 "register_operand" "")
3175 (match_operand:V4SF 1 "nonimmediate_operand" "")
3176 (parallel [(const_int 0) (const_int 1)]))))]
3179 (define_expand "vec_unpacks_lo_v8sf"
3180 [(set (match_operand:V4DF 0 "register_operand" "")
3183 (match_operand:V8SF 1 "nonimmediate_operand" "")
3184 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3187 (define_expand "vec_unpacks_float_hi_v8hi"
3188 [(match_operand:V4SF 0 "register_operand" "")
3189 (match_operand:V8HI 1 "register_operand" "")]
3192 rtx tmp = gen_reg_rtx (V4SImode);
3194 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3195 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3199 (define_expand "vec_unpacks_float_lo_v8hi"
3200 [(match_operand:V4SF 0 "register_operand" "")
3201 (match_operand:V8HI 1 "register_operand" "")]
3204 rtx tmp = gen_reg_rtx (V4SImode);
3206 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3207 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3211 (define_expand "vec_unpacku_float_hi_v8hi"
3212 [(match_operand:V4SF 0 "register_operand" "")
3213 (match_operand:V8HI 1 "register_operand" "")]
3216 rtx tmp = gen_reg_rtx (V4SImode);
3218 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3219 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3223 (define_expand "vec_unpacku_float_lo_v8hi"
3224 [(match_operand:V4SF 0 "register_operand" "")
3225 (match_operand:V8HI 1 "register_operand" "")]
3228 rtx tmp = gen_reg_rtx (V4SImode);
3230 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3231 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3235 (define_expand "vec_unpacks_float_hi_v4si"
3238 (match_operand:V4SI 1 "nonimmediate_operand" "")
3239 (parallel [(const_int 2)
3243 (set (match_operand:V2DF 0 "register_operand" "")
3247 (parallel [(const_int 0) (const_int 1)]))))]
3249 "operands[2] = gen_reg_rtx (V4SImode);")
3251 (define_expand "vec_unpacks_float_lo_v4si"
3252 [(set (match_operand:V2DF 0 "register_operand" "")
3255 (match_operand:V4SI 1 "nonimmediate_operand" "")
3256 (parallel [(const_int 0) (const_int 1)]))))]
3259 (define_expand "vec_unpacks_float_hi_v8si"
3262 (match_operand:V8SI 1 "nonimmediate_operand" "")
3263 (parallel [(const_int 4)
3267 (set (match_operand:V4DF 0 "register_operand" "")
3271 "operands[2] = gen_reg_rtx (V4SImode);")
3273 (define_expand "vec_unpacks_float_lo_v8si"
3274 [(set (match_operand:V4DF 0 "register_operand" "")
3277 (match_operand:V8SI 1 "nonimmediate_operand" "")
3278 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3281 (define_expand "vec_unpacku_float_hi_v4si"
3284 (match_operand:V4SI 1 "nonimmediate_operand" "")
3285 (parallel [(const_int 2)
3293 (parallel [(const_int 0) (const_int 1)]))))
3295 (lt:V2DF (match_dup 6) (match_dup 3)))
3297 (and:V2DF (match_dup 7) (match_dup 4)))
3298 (set (match_operand:V2DF 0 "register_operand" "")
3299 (plus:V2DF (match_dup 6) (match_dup 8)))]
3302 REAL_VALUE_TYPE TWO32r;
3306 real_ldexp (&TWO32r, &dconst1, 32);
3307 x = const_double_from_real_value (TWO32r, DFmode);
3309 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3310 operands[4] = force_reg (V2DFmode,
3311 ix86_build_const_vector (V2DFmode, 1, x));
3313 operands[5] = gen_reg_rtx (V4SImode);
3315 for (i = 6; i < 9; i++)
3316 operands[i] = gen_reg_rtx (V2DFmode);
3319 (define_expand "vec_unpacku_float_lo_v4si"
3323 (match_operand:V4SI 1 "nonimmediate_operand" "")
3324 (parallel [(const_int 0) (const_int 1)]))))
3326 (lt:V2DF (match_dup 5) (match_dup 3)))
3328 (and:V2DF (match_dup 6) (match_dup 4)))
3329 (set (match_operand:V2DF 0 "register_operand" "")
3330 (plus:V2DF (match_dup 5) (match_dup 7)))]
3333 REAL_VALUE_TYPE TWO32r;
3337 real_ldexp (&TWO32r, &dconst1, 32);
3338 x = const_double_from_real_value (TWO32r, DFmode);
3340 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3341 operands[4] = force_reg (V2DFmode,
3342 ix86_build_const_vector (V2DFmode, 1, x));
3344 for (i = 5; i < 8; i++)
3345 operands[i] = gen_reg_rtx (V2DFmode);
3348 (define_expand "vec_pack_trunc_v4df"
3350 (float_truncate:V4SF
3351 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3353 (float_truncate:V4SF
3354 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3355 (set (match_operand:V8SF 0 "register_operand" "")
3361 operands[3] = gen_reg_rtx (V4SFmode);
3362 operands[4] = gen_reg_rtx (V4SFmode);
3365 (define_expand "vec_pack_trunc_v2df"
3366 [(match_operand:V4SF 0 "register_operand" "")
3367 (match_operand:V2DF 1 "nonimmediate_operand" "")
3368 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3373 r1 = gen_reg_rtx (V4SFmode);
3374 r2 = gen_reg_rtx (V4SFmode);
3376 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3377 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3378 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3382 (define_expand "vec_pack_sfix_trunc_v2df"
3383 [(match_operand:V4SI 0 "register_operand" "")
3384 (match_operand:V2DF 1 "nonimmediate_operand" "")
3385 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3390 r1 = gen_reg_rtx (V4SImode);
3391 r2 = gen_reg_rtx (V4SImode);
3393 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3394 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3395 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3396 gen_lowpart (V2DImode, r1),
3397 gen_lowpart (V2DImode, r2)));
3401 (define_expand "vec_pack_sfix_v2df"
3402 [(match_operand:V4SI 0 "register_operand" "")
3403 (match_operand:V2DF 1 "nonimmediate_operand" "")
3404 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3409 r1 = gen_reg_rtx (V4SImode);
3410 r2 = gen_reg_rtx (V4SImode);
3412 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3413 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3414 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3415 gen_lowpart (V2DImode, r1),
3416 gen_lowpart (V2DImode, r2)));
3420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3422 ;; Parallel single-precision floating point element swizzling
3424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3426 (define_expand "sse_movhlps_exp"
3427 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3430 (match_operand:V4SF 1 "nonimmediate_operand" "")
3431 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3432 (parallel [(const_int 6)
3437 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3439 (define_insn "*avx_movhlps"
3440 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3443 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3444 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3445 (parallel [(const_int 6)
3449 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3451 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3452 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3453 vmovhps\t{%2, %0|%0, %2}"
3454 [(set_attr "type" "ssemov")
3455 (set_attr "prefix" "vex")
3456 (set_attr "mode" "V4SF,V2SF,V2SF")])
3458 (define_insn "sse_movhlps"
3459 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3462 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3463 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3464 (parallel [(const_int 6)
3468 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3470 movhlps\t{%2, %0|%0, %2}
3471 movlps\t{%H2, %0|%0, %H2}
3472 movhps\t{%2, %0|%0, %2}"
3473 [(set_attr "type" "ssemov")
3474 (set_attr "mode" "V4SF,V2SF,V2SF")])
3476 (define_expand "sse_movlhps_exp"
3477 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3480 (match_operand:V4SF 1 "nonimmediate_operand" "")
3481 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3482 (parallel [(const_int 0)
3487 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3489 (define_insn "*avx_movlhps"
3490 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3493 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3494 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3495 (parallel [(const_int 0)
3499 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3501 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3502 vmovhps\t{%2, %1, %0|%0, %1, %2}
3503 vmovlps\t{%2, %H0|%H0, %2}"
3504 [(set_attr "type" "ssemov")
3505 (set_attr "prefix" "vex")
3506 (set_attr "mode" "V4SF,V2SF,V2SF")])
3508 (define_insn "sse_movlhps"
3509 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3512 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3513 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3514 (parallel [(const_int 0)
3518 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3520 movlhps\t{%2, %0|%0, %2}
3521 movhps\t{%2, %0|%0, %2}
3522 movlps\t{%2, %H0|%H0, %2}"
3523 [(set_attr "type" "ssemov")
3524 (set_attr "mode" "V4SF,V2SF,V2SF")])
3526 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3527 (define_insn "avx_unpckhps256"
3528 [(set (match_operand:V8SF 0 "register_operand" "=x")
3531 (match_operand:V8SF 1 "register_operand" "x")
3532 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3533 (parallel [(const_int 2) (const_int 10)
3534 (const_int 3) (const_int 11)
3535 (const_int 6) (const_int 14)
3536 (const_int 7) (const_int 15)])))]
3538 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3539 [(set_attr "type" "sselog")
3540 (set_attr "prefix" "vex")
3541 (set_attr "mode" "V8SF")])
3543 (define_insn "*avx_interleave_highv4sf"
3544 [(set (match_operand:V4SF 0 "register_operand" "=x")
3547 (match_operand:V4SF 1 "register_operand" "x")
3548 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3549 (parallel [(const_int 2) (const_int 6)
3550 (const_int 3) (const_int 7)])))]
3552 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3553 [(set_attr "type" "sselog")
3554 (set_attr "prefix" "vex")
3555 (set_attr "mode" "V4SF")])
3557 (define_expand "vec_interleave_highv8sf"
3561 (match_operand:V8SF 1 "register_operand" "x")
3562 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3563 (parallel [(const_int 0) (const_int 8)
3564 (const_int 1) (const_int 9)
3565 (const_int 4) (const_int 12)
3566 (const_int 5) (const_int 13)])))
3572 (parallel [(const_int 2) (const_int 10)
3573 (const_int 3) (const_int 11)
3574 (const_int 6) (const_int 14)
3575 (const_int 7) (const_int 15)])))
3576 (set (match_operand:V8SF 0 "register_operand" "")
3580 (parallel [(const_int 4) (const_int 5)
3581 (const_int 6) (const_int 7)]))
3584 (parallel [(const_int 4) (const_int 5)
3585 (const_int 6) (const_int 7)]))))]
3588 operands[3] = gen_reg_rtx (V8SFmode);
3589 operands[4] = gen_reg_rtx (V8SFmode);
3592 (define_insn "vec_interleave_highv4sf"
3593 [(set (match_operand:V4SF 0 "register_operand" "=x")
3596 (match_operand:V4SF 1 "register_operand" "0")
3597 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3598 (parallel [(const_int 2) (const_int 6)
3599 (const_int 3) (const_int 7)])))]
3601 "unpckhps\t{%2, %0|%0, %2}"
3602 [(set_attr "type" "sselog")
3603 (set_attr "mode" "V4SF")])
3605 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3606 (define_insn "avx_unpcklps256"
3607 [(set (match_operand:V8SF 0 "register_operand" "=x")
3610 (match_operand:V8SF 1 "register_operand" "x")
3611 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3612 (parallel [(const_int 0) (const_int 8)
3613 (const_int 1) (const_int 9)
3614 (const_int 4) (const_int 12)
3615 (const_int 5) (const_int 13)])))]
3617 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3618 [(set_attr "type" "sselog")
3619 (set_attr "prefix" "vex")
3620 (set_attr "mode" "V8SF")])
3622 (define_insn "*avx_interleave_lowv4sf"
3623 [(set (match_operand:V4SF 0 "register_operand" "=x")
3626 (match_operand:V4SF 1 "register_operand" "x")
3627 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3628 (parallel [(const_int 0) (const_int 4)
3629 (const_int 1) (const_int 5)])))]
3631 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3632 [(set_attr "type" "sselog")
3633 (set_attr "prefix" "vex")
3634 (set_attr "mode" "V4SF")])
3636 (define_expand "vec_interleave_lowv8sf"
3640 (match_operand:V8SF 1 "register_operand" "x")
3641 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3642 (parallel [(const_int 0) (const_int 8)
3643 (const_int 1) (const_int 9)
3644 (const_int 4) (const_int 12)
3645 (const_int 5) (const_int 13)])))
3651 (parallel [(const_int 2) (const_int 10)
3652 (const_int 3) (const_int 11)
3653 (const_int 6) (const_int 14)
3654 (const_int 7) (const_int 15)])))
3655 (set (match_operand:V8SF 0 "register_operand" "")
3659 (parallel [(const_int 0) (const_int 1)
3660 (const_int 2) (const_int 3)]))
3663 (parallel [(const_int 0) (const_int 1)
3664 (const_int 2) (const_int 3)]))))]
3667 operands[3] = gen_reg_rtx (V8SFmode);
3668 operands[4] = gen_reg_rtx (V8SFmode);
3671 (define_insn "vec_interleave_lowv4sf"
3672 [(set (match_operand:V4SF 0 "register_operand" "=x")
3675 (match_operand:V4SF 1 "register_operand" "0")
3676 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3677 (parallel [(const_int 0) (const_int 4)
3678 (const_int 1) (const_int 5)])))]
3680 "unpcklps\t{%2, %0|%0, %2}"
3681 [(set_attr "type" "sselog")
3682 (set_attr "mode" "V4SF")])
3684 ;; These are modeled with the same vec_concat as the others so that we
3685 ;; capture users of shufps that can use the new instructions
3686 (define_insn "avx_movshdup256"
3687 [(set (match_operand:V8SF 0 "register_operand" "=x")
3690 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3692 (parallel [(const_int 1) (const_int 1)
3693 (const_int 3) (const_int 3)
3694 (const_int 5) (const_int 5)
3695 (const_int 7) (const_int 7)])))]
3697 "vmovshdup\t{%1, %0|%0, %1}"
3698 [(set_attr "type" "sse")
3699 (set_attr "prefix" "vex")
3700 (set_attr "mode" "V8SF")])
3702 (define_insn "sse3_movshdup"
3703 [(set (match_operand:V4SF 0 "register_operand" "=x")
3706 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3708 (parallel [(const_int 1)
3713 "%vmovshdup\t{%1, %0|%0, %1}"
3714 [(set_attr "type" "sse")
3715 (set_attr "prefix_rep" "1")
3716 (set_attr "prefix" "maybe_vex")
3717 (set_attr "mode" "V4SF")])
3719 (define_insn "avx_movsldup256"
3720 [(set (match_operand:V8SF 0 "register_operand" "=x")
3723 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3725 (parallel [(const_int 0) (const_int 0)
3726 (const_int 2) (const_int 2)
3727 (const_int 4) (const_int 4)
3728 (const_int 6) (const_int 6)])))]
3730 "vmovsldup\t{%1, %0|%0, %1}"
3731 [(set_attr "type" "sse")
3732 (set_attr "prefix" "vex")
3733 (set_attr "mode" "V8SF")])
3735 (define_insn "sse3_movsldup"
3736 [(set (match_operand:V4SF 0 "register_operand" "=x")
3739 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3741 (parallel [(const_int 0)
3746 "%vmovsldup\t{%1, %0|%0, %1}"
3747 [(set_attr "type" "sse")
3748 (set_attr "prefix_rep" "1")
3749 (set_attr "prefix" "maybe_vex")
3750 (set_attr "mode" "V4SF")])
3752 (define_expand "avx_shufps256"
3753 [(match_operand:V8SF 0 "register_operand" "")
3754 (match_operand:V8SF 1 "register_operand" "")
3755 (match_operand:V8SF 2 "nonimmediate_operand" "")
3756 (match_operand:SI 3 "const_int_operand" "")]
3759 int mask = INTVAL (operands[3]);
3760 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3761 GEN_INT ((mask >> 0) & 3),
3762 GEN_INT ((mask >> 2) & 3),
3763 GEN_INT (((mask >> 4) & 3) + 8),
3764 GEN_INT (((mask >> 6) & 3) + 8),
3765 GEN_INT (((mask >> 0) & 3) + 4),
3766 GEN_INT (((mask >> 2) & 3) + 4),
3767 GEN_INT (((mask >> 4) & 3) + 12),
3768 GEN_INT (((mask >> 6) & 3) + 12)));
3772 ;; One bit in mask selects 2 elements.
3773 (define_insn "avx_shufps256_1"
3774 [(set (match_operand:V8SF 0 "register_operand" "=x")
3777 (match_operand:V8SF 1 "register_operand" "x")
3778 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3779 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3780 (match_operand 4 "const_0_to_3_operand" "")
3781 (match_operand 5 "const_8_to_11_operand" "")
3782 (match_operand 6 "const_8_to_11_operand" "")
3783 (match_operand 7 "const_4_to_7_operand" "")
3784 (match_operand 8 "const_4_to_7_operand" "")
3785 (match_operand 9 "const_12_to_15_operand" "")
3786 (match_operand 10 "const_12_to_15_operand" "")])))]
3788 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3789 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3790 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3791 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3794 mask = INTVAL (operands[3]);
3795 mask |= INTVAL (operands[4]) << 2;
3796 mask |= (INTVAL (operands[5]) - 8) << 4;
3797 mask |= (INTVAL (operands[6]) - 8) << 6;
3798 operands[3] = GEN_INT (mask);
3800 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3802 [(set_attr "type" "sselog")
3803 (set_attr "length_immediate" "1")
3804 (set_attr "prefix" "vex")
3805 (set_attr "mode" "V8SF")])
3807 (define_expand "sse_shufps"
3808 [(match_operand:V4SF 0 "register_operand" "")
3809 (match_operand:V4SF 1 "register_operand" "")
3810 (match_operand:V4SF 2 "nonimmediate_operand" "")
3811 (match_operand:SI 3 "const_int_operand" "")]
3814 int mask = INTVAL (operands[3]);
3815 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3816 GEN_INT ((mask >> 0) & 3),
3817 GEN_INT ((mask >> 2) & 3),
3818 GEN_INT (((mask >> 4) & 3) + 4),
3819 GEN_INT (((mask >> 6) & 3) + 4)));
3823 (define_insn "*avx_shufps_<mode>"
3824 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3825 (vec_select:SSEMODE4S
3826 (vec_concat:<ssedoublesizemode>
3827 (match_operand:SSEMODE4S 1 "register_operand" "x")
3828 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3829 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3830 (match_operand 4 "const_0_to_3_operand" "")
3831 (match_operand 5 "const_4_to_7_operand" "")
3832 (match_operand 6 "const_4_to_7_operand" "")])))]
3836 mask |= INTVAL (operands[3]) << 0;
3837 mask |= INTVAL (operands[4]) << 2;
3838 mask |= (INTVAL (operands[5]) - 4) << 4;
3839 mask |= (INTVAL (operands[6]) - 4) << 6;
3840 operands[3] = GEN_INT (mask);
3842 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3844 [(set_attr "type" "sselog")
3845 (set_attr "length_immediate" "1")
3846 (set_attr "prefix" "vex")
3847 (set_attr "mode" "V4SF")])
3849 (define_insn "sse_shufps_<mode>"
3850 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3851 (vec_select:SSEMODE4S
3852 (vec_concat:<ssedoublesizemode>
3853 (match_operand:SSEMODE4S 1 "register_operand" "0")
3854 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3855 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3856 (match_operand 4 "const_0_to_3_operand" "")
3857 (match_operand 5 "const_4_to_7_operand" "")
3858 (match_operand 6 "const_4_to_7_operand" "")])))]
3862 mask |= INTVAL (operands[3]) << 0;
3863 mask |= INTVAL (operands[4]) << 2;
3864 mask |= (INTVAL (operands[5]) - 4) << 4;
3865 mask |= (INTVAL (operands[6]) - 4) << 6;
3866 operands[3] = GEN_INT (mask);
3868 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3870 [(set_attr "type" "sselog")
3871 (set_attr "length_immediate" "1")
3872 (set_attr "mode" "V4SF")])
3874 (define_insn "sse_storehps"
3875 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3877 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3878 (parallel [(const_int 2) (const_int 3)])))]
3881 %vmovhps\t{%1, %0|%0, %1}
3882 %vmovhlps\t{%1, %d0|%d0, %1}
3883 %vmovlps\t{%H1, %d0|%d0, %H1}"
3884 [(set_attr "type" "ssemov")
3885 (set_attr "prefix" "maybe_vex")
3886 (set_attr "mode" "V2SF,V4SF,V2SF")])
3888 (define_expand "sse_loadhps_exp"
3889 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3892 (match_operand:V4SF 1 "nonimmediate_operand" "")
3893 (parallel [(const_int 0) (const_int 1)]))
3894 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3896 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3898 (define_insn "*avx_loadhps"
3899 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3902 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3903 (parallel [(const_int 0) (const_int 1)]))
3904 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3907 vmovhps\t{%2, %1, %0|%0, %1, %2}
3908 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3909 vmovlps\t{%2, %H0|%H0, %2}"
3910 [(set_attr "type" "ssemov")
3911 (set_attr "prefix" "vex")
3912 (set_attr "mode" "V2SF,V4SF,V2SF")])
3914 (define_insn "sse_loadhps"
3915 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3918 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3919 (parallel [(const_int 0) (const_int 1)]))
3920 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3923 movhps\t{%2, %0|%0, %2}
3924 movlhps\t{%2, %0|%0, %2}
3925 movlps\t{%2, %H0|%H0, %2}"
3926 [(set_attr "type" "ssemov")
3927 (set_attr "mode" "V2SF,V4SF,V2SF")])
3929 (define_insn "*avx_storelps"
3930 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3932 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3933 (parallel [(const_int 0) (const_int 1)])))]
3936 vmovlps\t{%1, %0|%0, %1}
3937 vmovaps\t{%1, %0|%0, %1}
3938 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3939 [(set_attr "type" "ssemov")
3940 (set_attr "prefix" "vex")
3941 (set_attr "mode" "V2SF,V2DF,V2SF")])
3943 (define_insn "sse_storelps"
3944 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3946 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3947 (parallel [(const_int 0) (const_int 1)])))]
3950 movlps\t{%1, %0|%0, %1}
3951 movaps\t{%1, %0|%0, %1}
3952 movlps\t{%1, %0|%0, %1}"
3953 [(set_attr "type" "ssemov")
3954 (set_attr "mode" "V2SF,V4SF,V2SF")])
3956 (define_expand "sse_loadlps_exp"
3957 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3959 (match_operand:V2SF 2 "nonimmediate_operand" "")
3961 (match_operand:V4SF 1 "nonimmediate_operand" "")
3962 (parallel [(const_int 2) (const_int 3)]))))]
3964 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3966 (define_insn "*avx_loadlps"
3967 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3969 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3971 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3972 (parallel [(const_int 2) (const_int 3)]))))]
3975 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3976 vmovlps\t{%2, %1, %0|%0, %1, %2}
3977 vmovlps\t{%2, %0|%0, %2}"
3978 [(set_attr "type" "sselog,ssemov,ssemov")
3979 (set_attr "length_immediate" "1,*,*")
3980 (set_attr "prefix" "vex")
3981 (set_attr "mode" "V4SF,V2SF,V2SF")])
3983 (define_insn "sse_loadlps"
3984 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3986 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3988 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3989 (parallel [(const_int 2) (const_int 3)]))))]
3992 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3993 movlps\t{%2, %0|%0, %2}
3994 movlps\t{%2, %0|%0, %2}"
3995 [(set_attr "type" "sselog,ssemov,ssemov")
3996 (set_attr "length_immediate" "1,*,*")
3997 (set_attr "mode" "V4SF,V2SF,V2SF")])
3999 (define_insn "*avx_movss"
4000 [(set (match_operand:V4SF 0 "register_operand" "=x")
4002 (match_operand:V4SF 2 "register_operand" "x")
4003 (match_operand:V4SF 1 "register_operand" "x")
4006 "vmovss\t{%2, %1, %0|%0, %1, %2}"
4007 [(set_attr "type" "ssemov")
4008 (set_attr "prefix" "vex")
4009 (set_attr "mode" "SF")])
4011 (define_insn "sse_movss"
4012 [(set (match_operand:V4SF 0 "register_operand" "=x")
4014 (match_operand:V4SF 2 "register_operand" "x")
4015 (match_operand:V4SF 1 "register_operand" "0")
4018 "movss\t{%2, %0|%0, %2}"
4019 [(set_attr "type" "ssemov")
4020 (set_attr "mode" "SF")])
4022 (define_expand "vec_dupv4sf"
4023 [(set (match_operand:V4SF 0 "register_operand" "")
4025 (match_operand:SF 1 "nonimmediate_operand" "")))]
4029 operands[1] = force_reg (V4SFmode, operands[1]);
4032 (define_insn "*vec_dupv4sf_avx"
4033 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4035 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
4038 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4039 vbroadcastss\t{%1, %0|%0, %1}"
4040 [(set_attr "type" "sselog1,ssemov")
4041 (set_attr "length_immediate" "1,0")
4042 (set_attr "prefix_extra" "0,1")
4043 (set_attr "prefix" "vex")
4044 (set_attr "mode" "V4SF")])
4046 (define_insn "*vec_dupv4sf"
4047 [(set (match_operand:V4SF 0 "register_operand" "=x")
4049 (match_operand:SF 1 "register_operand" "0")))]
4051 "shufps\t{$0, %0, %0|%0, %0, 0}"
4052 [(set_attr "type" "sselog1")
4053 (set_attr "length_immediate" "1")
4054 (set_attr "mode" "V4SF")])
4056 (define_insn "*vec_concatv2sf_avx"
4057 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4059 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
4060 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4063 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4064 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4065 vmovss\t{%1, %0|%0, %1}
4066 punpckldq\t{%2, %0|%0, %2}
4067 movd\t{%1, %0|%0, %1}"
4068 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4069 (set_attr "length_immediate" "*,1,*,*,*")
4070 (set_attr "prefix_extra" "*,1,*,*,*")
4071 (set (attr "prefix")
4072 (if_then_else (eq_attr "alternative" "3,4")
4073 (const_string "orig")
4074 (const_string "vex")))
4075 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4077 ;; Although insertps takes register source, we prefer
4078 ;; unpcklps with register source since it is shorter.
4079 (define_insn "*vec_concatv2sf_sse4_1"
4080 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4082 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
4083 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4086 unpcklps\t{%2, %0|%0, %2}
4087 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4088 movss\t{%1, %0|%0, %1}
4089 punpckldq\t{%2, %0|%0, %2}
4090 movd\t{%1, %0|%0, %1}"
4091 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4092 (set_attr "prefix_data16" "*,1,*,*,*")
4093 (set_attr "prefix_extra" "*,1,*,*,*")
4094 (set_attr "length_immediate" "*,1,*,*,*")
4095 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4097 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4098 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4099 ;; alternatives pretty much forces the MMX alternative to be chosen.
4100 (define_insn "*vec_concatv2sf_sse"
4101 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4103 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4104 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4107 unpcklps\t{%2, %0|%0, %2}
4108 movss\t{%1, %0|%0, %1}
4109 punpckldq\t{%2, %0|%0, %2}
4110 movd\t{%1, %0|%0, %1}"
4111 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4112 (set_attr "mode" "V4SF,SF,DI,DI")])
4114 (define_insn "*vec_concatv4sf_avx"
4115 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4117 (match_operand:V2SF 1 "register_operand" " x,x")
4118 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4121 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4122 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4123 [(set_attr "type" "ssemov")
4124 (set_attr "prefix" "vex")
4125 (set_attr "mode" "V4SF,V2SF")])
4127 (define_insn "*vec_concatv4sf_sse"
4128 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4130 (match_operand:V2SF 1 "register_operand" " 0,0")
4131 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4134 movlhps\t{%2, %0|%0, %2}
4135 movhps\t{%2, %0|%0, %2}"
4136 [(set_attr "type" "ssemov")
4137 (set_attr "mode" "V4SF,V2SF")])
4139 (define_expand "vec_init<mode>"
4140 [(match_operand:SSEMODE 0 "register_operand" "")
4141 (match_operand 1 "" "")]
4144 ix86_expand_vector_init (false, operands[0], operands[1]);
4148 (define_insn "*vec_set<mode>_0_avx"
4149 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4150 (vec_merge:SSEMODE4S
4151 (vec_duplicate:SSEMODE4S
4152 (match_operand:<ssescalarmode> 2
4153 "general_operand" " x,m,*r,x,*rm,x*rfF"))
4154 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
4158 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
4159 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4160 vmovd\t{%2, %0|%0, %2}
4161 vmovss\t{%2, %1, %0|%0, %1, %2}
4162 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4164 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4165 (set_attr "prefix_extra" "*,*,*,*,1,*")
4166 (set_attr "length_immediate" "*,*,*,*,1,*")
4167 (set_attr "prefix" "vex")
4168 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4170 (define_insn "*vec_set<mode>_0_sse4_1"
4171 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4172 (vec_merge:SSEMODE4S
4173 (vec_duplicate:SSEMODE4S
4174 (match_operand:<ssescalarmode> 2
4175 "general_operand" " x,m,*r,x,*rm,*rfF"))
4176 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4180 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4181 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4182 movd\t{%2, %0|%0, %2}
4183 movss\t{%2, %0|%0, %2}
4184 pinsrd\t{$0, %2, %0|%0, %2, 0}
4186 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4187 (set_attr "prefix_extra" "*,*,*,*,1,*")
4188 (set_attr "length_immediate" "*,*,*,*,1,*")
4189 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4191 (define_insn "*vec_set<mode>_0_sse2"
4192 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4193 (vec_merge:SSEMODE4S
4194 (vec_duplicate:SSEMODE4S
4195 (match_operand:<ssescalarmode> 2
4196 "general_operand" " m,*r,x,x*rfF"))
4197 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4201 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4202 movd\t{%2, %0|%0, %2}
4203 movss\t{%2, %0|%0, %2}
4205 [(set_attr "type" "ssemov")
4206 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4208 (define_insn "vec_set<mode>_0"
4209 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4210 (vec_merge:SSEMODE4S
4211 (vec_duplicate:SSEMODE4S
4212 (match_operand:<ssescalarmode> 2
4213 "general_operand" " m,x,x*rfF"))
4214 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4218 movss\t{%2, %0|%0, %2}
4219 movss\t{%2, %0|%0, %2}
4221 [(set_attr "type" "ssemov")
4222 (set_attr "mode" "SF,SF,*")])
4224 ;; A subset is vec_setv4sf.
4225 (define_insn "*vec_setv4sf_avx"
4226 [(set (match_operand:V4SF 0 "register_operand" "=x")
4229 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4230 (match_operand:V4SF 1 "register_operand" "x")
4231 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4234 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4235 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4237 [(set_attr "type" "sselog")
4238 (set_attr "prefix_extra" "1")
4239 (set_attr "length_immediate" "1")
4240 (set_attr "prefix" "vex")
4241 (set_attr "mode" "V4SF")])
4243 (define_insn "*vec_setv4sf_sse4_1"
4244 [(set (match_operand:V4SF 0 "register_operand" "=x")
4247 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4248 (match_operand:V4SF 1 "register_operand" "0")
4249 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4252 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4253 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4255 [(set_attr "type" "sselog")
4256 (set_attr "prefix_data16" "1")
4257 (set_attr "prefix_extra" "1")
4258 (set_attr "length_immediate" "1")
4259 (set_attr "mode" "V4SF")])
4261 (define_insn "*avx_insertps"
4262 [(set (match_operand:V4SF 0 "register_operand" "=x")
4263 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4264 (match_operand:V4SF 1 "register_operand" "x")
4265 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4268 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4269 [(set_attr "type" "sselog")
4270 (set_attr "prefix" "vex")
4271 (set_attr "prefix_extra" "1")
4272 (set_attr "length_immediate" "1")
4273 (set_attr "mode" "V4SF")])
4275 (define_insn "sse4_1_insertps"
4276 [(set (match_operand:V4SF 0 "register_operand" "=x")
4277 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4278 (match_operand:V4SF 1 "register_operand" "0")
4279 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4282 "insertps\t{%3, %2, %0|%0, %2, %3}";
4283 [(set_attr "type" "sselog")
4284 (set_attr "prefix_data16" "1")
4285 (set_attr "prefix_extra" "1")
4286 (set_attr "length_immediate" "1")
4287 (set_attr "mode" "V4SF")])
4290 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4291 (vec_merge:SSEMODE4S
4292 (vec_duplicate:SSEMODE4S
4293 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4296 "TARGET_SSE && reload_completed"
4299 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4304 (define_expand "vec_set<mode>"
4305 [(match_operand:SSEMODE 0 "register_operand" "")
4306 (match_operand:<ssescalarmode> 1 "register_operand" "")
4307 (match_operand 2 "const_int_operand" "")]
4310 ix86_expand_vector_set (false, operands[0], operands[1],
4311 INTVAL (operands[2]));
4315 (define_insn_and_split "*vec_extractv4sf_0"
4316 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4318 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4319 (parallel [(const_int 0)])))]
4320 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4322 "&& reload_completed"
4325 rtx op1 = operands[1];
4327 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4329 op1 = gen_lowpart (SFmode, op1);
4330 emit_move_insn (operands[0], op1);
4334 (define_expand "avx_vextractf128<mode>"
4335 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4336 (match_operand:AVX256MODE 1 "register_operand" "")
4337 (match_operand:SI 2 "const_0_to_1_operand" "")]
4340 switch (INTVAL (operands[2]))
4343 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4346 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4354 (define_insn_and_split "vec_extract_lo_<mode>"
4355 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4356 (vec_select:<avxhalfvecmode>
4357 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4358 (parallel [(const_int 0) (const_int 1)])))]
4361 "&& reload_completed"
4364 rtx op1 = operands[1];
4366 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4368 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4369 emit_move_insn (operands[0], op1);
4373 (define_insn "vec_extract_hi_<mode>"
4374 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4375 (vec_select:<avxhalfvecmode>
4376 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4377 (parallel [(const_int 2) (const_int 3)])))]
4379 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4380 [(set_attr "type" "sselog")
4381 (set_attr "prefix_extra" "1")
4382 (set_attr "length_immediate" "1")
4383 (set_attr "memory" "none,store")
4384 (set_attr "prefix" "vex")
4385 (set_attr "mode" "V8SF")])
4387 (define_insn_and_split "vec_extract_lo_<mode>"
4388 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4389 (vec_select:<avxhalfvecmode>
4390 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4391 (parallel [(const_int 0) (const_int 1)
4392 (const_int 2) (const_int 3)])))]
4395 "&& reload_completed"
4398 rtx op1 = operands[1];
4400 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4402 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4403 emit_move_insn (operands[0], op1);
4407 (define_insn "vec_extract_hi_<mode>"
4408 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4409 (vec_select:<avxhalfvecmode>
4410 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4411 (parallel [(const_int 4) (const_int 5)
4412 (const_int 6) (const_int 7)])))]
4414 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4415 [(set_attr "type" "sselog")
4416 (set_attr "prefix_extra" "1")
4417 (set_attr "length_immediate" "1")
4418 (set_attr "memory" "none,store")
4419 (set_attr "prefix" "vex")
4420 (set_attr "mode" "V8SF")])
4422 (define_insn_and_split "vec_extract_lo_v16hi"
4423 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4425 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4426 (parallel [(const_int 0) (const_int 1)
4427 (const_int 2) (const_int 3)
4428 (const_int 4) (const_int 5)
4429 (const_int 6) (const_int 7)])))]
4432 "&& reload_completed"
4435 rtx op1 = operands[1];
4437 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4439 op1 = gen_lowpart (V8HImode, op1);
4440 emit_move_insn (operands[0], op1);
4444 (define_insn "vec_extract_hi_v16hi"
4445 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4447 (match_operand:V16HI 1 "register_operand" "x,x")
4448 (parallel [(const_int 8) (const_int 9)
4449 (const_int 10) (const_int 11)
4450 (const_int 12) (const_int 13)
4451 (const_int 14) (const_int 15)])))]
4453 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4454 [(set_attr "type" "sselog")
4455 (set_attr "prefix_extra" "1")
4456 (set_attr "length_immediate" "1")
4457 (set_attr "memory" "none,store")
4458 (set_attr "prefix" "vex")
4459 (set_attr "mode" "V8SF")])
4461 (define_insn_and_split "vec_extract_lo_v32qi"
4462 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4464 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4465 (parallel [(const_int 0) (const_int 1)
4466 (const_int 2) (const_int 3)
4467 (const_int 4) (const_int 5)
4468 (const_int 6) (const_int 7)
4469 (const_int 8) (const_int 9)
4470 (const_int 10) (const_int 11)
4471 (const_int 12) (const_int 13)
4472 (const_int 14) (const_int 15)])))]
4475 "&& reload_completed"
4478 rtx op1 = operands[1];
4480 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4482 op1 = gen_lowpart (V16QImode, op1);
4483 emit_move_insn (operands[0], op1);
4487 (define_insn "vec_extract_hi_v32qi"
4488 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4490 (match_operand:V32QI 1 "register_operand" "x,x")
4491 (parallel [(const_int 16) (const_int 17)
4492 (const_int 18) (const_int 19)
4493 (const_int 20) (const_int 21)
4494 (const_int 22) (const_int 23)
4495 (const_int 24) (const_int 25)
4496 (const_int 26) (const_int 27)
4497 (const_int 28) (const_int 29)
4498 (const_int 30) (const_int 31)])))]
4500 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4501 [(set_attr "type" "sselog")
4502 (set_attr "prefix_extra" "1")
4503 (set_attr "length_immediate" "1")
4504 (set_attr "memory" "none,store")
4505 (set_attr "prefix" "vex")
4506 (set_attr "mode" "V8SF")])
4508 (define_insn "*sse4_1_extractps"
4509 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4511 (match_operand:V4SF 1 "register_operand" "x")
4512 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4514 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4515 [(set_attr "type" "sselog")
4516 (set_attr "prefix_data16" "1")
4517 (set_attr "prefix_extra" "1")
4518 (set_attr "length_immediate" "1")
4519 (set_attr "prefix" "maybe_vex")
4520 (set_attr "mode" "V4SF")])
4522 (define_insn_and_split "*vec_extract_v4sf_mem"
4523 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4525 (match_operand:V4SF 1 "memory_operand" "o")
4526 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4532 int i = INTVAL (operands[2]);
4534 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4538 (define_expand "vec_extract<mode>"
4539 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4540 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4541 (match_operand 2 "const_int_operand" "")]
4544 ix86_expand_vector_extract (false, operands[0], operands[1],
4545 INTVAL (operands[2]));
4549 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4551 ;; Parallel double-precision floating point element swizzling
4553 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4555 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4556 (define_insn "avx_unpckhpd256"
4557 [(set (match_operand:V4DF 0 "register_operand" "=x")
4560 (match_operand:V4DF 1 "register_operand" "x")
4561 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4562 (parallel [(const_int 1) (const_int 5)
4563 (const_int 3) (const_int 7)])))]
4565 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4566 [(set_attr "type" "sselog")
4567 (set_attr "prefix" "vex")
4568 (set_attr "mode" "V4DF")])
4570 (define_expand "vec_interleave_highv4df"
4574 (match_operand:V4DF 1 "register_operand" "x")
4575 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4576 (parallel [(const_int 0) (const_int 4)
4577 (const_int 2) (const_int 6)])))
4583 (parallel [(const_int 1) (const_int 5)
4584 (const_int 3) (const_int 7)])))
4585 (set (match_operand:V4DF 0 "register_operand" "")
4589 (parallel [(const_int 2) (const_int 3)]))
4592 (parallel [(const_int 2) (const_int 3)]))))]
4595 operands[3] = gen_reg_rtx (V4DFmode);
4596 operands[4] = gen_reg_rtx (V4DFmode);
4600 (define_expand "vec_interleave_highv2df"
4601 [(set (match_operand:V2DF 0 "register_operand" "")
4604 (match_operand:V2DF 1 "nonimmediate_operand" "")
4605 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4606 (parallel [(const_int 1)
4610 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4611 operands[2] = force_reg (V2DFmode, operands[2]);
4614 (define_insn "*avx_interleave_highv2df"
4615 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4618 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4619 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4620 (parallel [(const_int 1)
4622 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4624 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4625 vmovddup\t{%H1, %0|%0, %H1}
4626 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4627 vmovhpd\t{%1, %0|%0, %1}"
4628 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4629 (set_attr "prefix" "vex")
4630 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4632 (define_insn "*sse3_interleave_highv2df"
4633 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4636 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4637 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4638 (parallel [(const_int 1)
4640 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4642 unpckhpd\t{%2, %0|%0, %2}
4643 movddup\t{%H1, %0|%0, %H1}
4644 movlpd\t{%H1, %0|%0, %H1}
4645 movhpd\t{%1, %0|%0, %1}"
4646 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4647 (set_attr "prefix_data16" "*,*,1,1")
4648 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4650 (define_insn "*sse2_interleave_highv2df"
4651 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4654 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4655 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4656 (parallel [(const_int 1)
4658 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4660 unpckhpd\t{%2, %0|%0, %2}
4661 movlpd\t{%H1, %0|%0, %H1}
4662 movhpd\t{%1, %0|%0, %1}"
4663 [(set_attr "type" "sselog,ssemov,ssemov")
4664 (set_attr "prefix_data16" "*,1,1")
4665 (set_attr "mode" "V2DF,V1DF,V1DF")])
4667 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4668 (define_expand "avx_movddup256"
4669 [(set (match_operand:V4DF 0 "register_operand" "")
4672 (match_operand:V4DF 1 "nonimmediate_operand" "")
4674 (parallel [(const_int 0) (const_int 4)
4675 (const_int 2) (const_int 6)])))]
4678 (define_expand "avx_unpcklpd256"
4679 [(set (match_operand:V4DF 0 "register_operand" "")
4682 (match_operand:V4DF 1 "register_operand" "")
4683 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4684 (parallel [(const_int 0) (const_int 4)
4685 (const_int 2) (const_int 6)])))]
4688 (define_insn "*avx_unpcklpd256"
4689 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4692 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4693 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4694 (parallel [(const_int 0) (const_int 4)
4695 (const_int 2) (const_int 6)])))]
4697 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4699 vmovddup\t{%1, %0|%0, %1}
4700 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4701 [(set_attr "type" "sselog")
4702 (set_attr "prefix" "vex")
4703 (set_attr "mode" "V4DF")])
4705 (define_expand "vec_interleave_lowv4df"
4709 (match_operand:V4DF 1 "register_operand" "x")
4710 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4711 (parallel [(const_int 0) (const_int 4)
4712 (const_int 2) (const_int 6)])))
4718 (parallel [(const_int 1) (const_int 5)
4719 (const_int 3) (const_int 7)])))
4720 (set (match_operand:V4DF 0 "register_operand" "")
4724 (parallel [(const_int 0) (const_int 1)]))
4727 (parallel [(const_int 0) (const_int 1)]))))]
4730 operands[3] = gen_reg_rtx (V4DFmode);
4731 operands[4] = gen_reg_rtx (V4DFmode);
4734 (define_expand "vec_interleave_lowv2df"
4735 [(set (match_operand:V2DF 0 "register_operand" "")
4738 (match_operand:V2DF 1 "nonimmediate_operand" "")
4739 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4740 (parallel [(const_int 0)
4744 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4745 operands[1] = force_reg (V2DFmode, operands[1]);
4748 (define_insn "*avx_interleave_lowv2df"
4749 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4752 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4753 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4754 (parallel [(const_int 0)
4756 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4758 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4759 vmovddup\t{%1, %0|%0, %1}
4760 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4761 vmovlpd\t{%2, %H0|%H0, %2}"
4762 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4763 (set_attr "prefix" "vex")
4764 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4766 (define_insn "*sse3_interleave_lowv2df"
4767 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4770 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4771 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4772 (parallel [(const_int 0)
4774 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4776 unpcklpd\t{%2, %0|%0, %2}
4777 movddup\t{%1, %0|%0, %1}
4778 movhpd\t{%2, %0|%0, %2}
4779 movlpd\t{%2, %H0|%H0, %2}"
4780 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4781 (set_attr "prefix_data16" "*,*,1,1")
4782 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4784 (define_insn "*sse2_interleave_lowv2df"
4785 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4788 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4789 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4790 (parallel [(const_int 0)
4792 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4794 unpcklpd\t{%2, %0|%0, %2}
4795 movhpd\t{%2, %0|%0, %2}
4796 movlpd\t{%2, %H0|%H0, %2}"
4797 [(set_attr "type" "sselog,ssemov,ssemov")
4798 (set_attr "prefix_data16" "*,1,1")
4799 (set_attr "mode" "V2DF,V1DF,V1DF")])
4802 [(set (match_operand:V2DF 0 "memory_operand" "")
4805 (match_operand:V2DF 1 "register_operand" "")
4807 (parallel [(const_int 0)
4809 "TARGET_SSE3 && reload_completed"
4812 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4813 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4814 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4819 [(set (match_operand:V2DF 0 "register_operand" "")
4822 (match_operand:V2DF 1 "memory_operand" "")
4824 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4825 (match_operand:SI 3 "const_int_operand" "")])))]
4826 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4827 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4829 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4832 (define_expand "avx_shufpd256"
4833 [(match_operand:V4DF 0 "register_operand" "")
4834 (match_operand:V4DF 1 "register_operand" "")
4835 (match_operand:V4DF 2 "nonimmediate_operand" "")
4836 (match_operand:SI 3 "const_int_operand" "")]
4839 int mask = INTVAL (operands[3]);
4840 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4842 GEN_INT (mask & 2 ? 5 : 4),
4843 GEN_INT (mask & 4 ? 3 : 2),
4844 GEN_INT (mask & 8 ? 7 : 6)));
4848 (define_insn "avx_shufpd256_1"
4849 [(set (match_operand:V4DF 0 "register_operand" "=x")
4852 (match_operand:V4DF 1 "register_operand" "x")
4853 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4854 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4855 (match_operand 4 "const_4_to_5_operand" "")
4856 (match_operand 5 "const_2_to_3_operand" "")
4857 (match_operand 6 "const_6_to_7_operand" "")])))]
4861 mask = INTVAL (operands[3]);
4862 mask |= (INTVAL (operands[4]) - 4) << 1;
4863 mask |= (INTVAL (operands[5]) - 2) << 2;
4864 mask |= (INTVAL (operands[6]) - 6) << 3;
4865 operands[3] = GEN_INT (mask);
4867 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4869 [(set_attr "type" "sselog")
4870 (set_attr "length_immediate" "1")
4871 (set_attr "prefix" "vex")
4872 (set_attr "mode" "V4DF")])
4874 (define_expand "sse2_shufpd"
4875 [(match_operand:V2DF 0 "register_operand" "")
4876 (match_operand:V2DF 1 "register_operand" "")
4877 (match_operand:V2DF 2 "nonimmediate_operand" "")
4878 (match_operand:SI 3 "const_int_operand" "")]
4881 int mask = INTVAL (operands[3]);
4882 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4884 GEN_INT (mask & 2 ? 3 : 2)));
4888 (define_expand "vec_extract_even<mode>"
4889 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4890 (match_operand:SSEMODE_EO 1 "register_operand" "")
4891 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4894 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4898 (define_expand "vec_extract_odd<mode>"
4899 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4900 (match_operand:SSEMODE_EO 1 "register_operand" "")
4901 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4904 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4908 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4909 (define_insn "*avx_interleave_highv2di"
4910 [(set (match_operand:V2DI 0 "register_operand" "=x")
4913 (match_operand:V2DI 1 "register_operand" "x")
4914 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4915 (parallel [(const_int 1)
4918 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4919 [(set_attr "type" "sselog")
4920 (set_attr "prefix" "vex")
4921 (set_attr "mode" "TI")])
4923 (define_insn "vec_interleave_highv2di"
4924 [(set (match_operand:V2DI 0 "register_operand" "=x")
4927 (match_operand:V2DI 1 "register_operand" "0")
4928 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4929 (parallel [(const_int 1)
4932 "punpckhqdq\t{%2, %0|%0, %2}"
4933 [(set_attr "type" "sselog")
4934 (set_attr "prefix_data16" "1")
4935 (set_attr "mode" "TI")])
4937 (define_insn "*avx_interleave_lowv2di"
4938 [(set (match_operand:V2DI 0 "register_operand" "=x")
4941 (match_operand:V2DI 1 "register_operand" "x")
4942 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4943 (parallel [(const_int 0)
4946 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4947 [(set_attr "type" "sselog")
4948 (set_attr "prefix" "vex")
4949 (set_attr "mode" "TI")])
4951 (define_insn "vec_interleave_lowv2di"
4952 [(set (match_operand:V2DI 0 "register_operand" "=x")
4955 (match_operand:V2DI 1 "register_operand" "0")
4956 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4957 (parallel [(const_int 0)
4960 "punpcklqdq\t{%2, %0|%0, %2}"
4961 [(set_attr "type" "sselog")
4962 (set_attr "prefix_data16" "1")
4963 (set_attr "mode" "TI")])
4965 (define_insn "*avx_shufpd_<mode>"
4966 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4967 (vec_select:SSEMODE2D
4968 (vec_concat:<ssedoublesizemode>
4969 (match_operand:SSEMODE2D 1 "register_operand" "x")
4970 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4971 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4972 (match_operand 4 "const_2_to_3_operand" "")])))]
4976 mask = INTVAL (operands[3]);
4977 mask |= (INTVAL (operands[4]) - 2) << 1;
4978 operands[3] = GEN_INT (mask);
4980 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4982 [(set_attr "type" "sselog")
4983 (set_attr "length_immediate" "1")
4984 (set_attr "prefix" "vex")
4985 (set_attr "mode" "V2DF")])
4987 (define_insn "sse2_shufpd_<mode>"
4988 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4989 (vec_select:SSEMODE2D
4990 (vec_concat:<ssedoublesizemode>
4991 (match_operand:SSEMODE2D 1 "register_operand" "0")
4992 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4993 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4994 (match_operand 4 "const_2_to_3_operand" "")])))]
4998 mask = INTVAL (operands[3]);
4999 mask |= (INTVAL (operands[4]) - 2) << 1;
5000 operands[3] = GEN_INT (mask);
5002 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
5004 [(set_attr "type" "sselog")
5005 (set_attr "length_immediate" "1")
5006 (set_attr "mode" "V2DF")])
5008 ;; Avoid combining registers from different units in a single alternative,
5009 ;; see comment above inline_secondary_memory_needed function in i386.c
5010 (define_insn "*avx_storehpd"
5011 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5013 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
5014 (parallel [(const_int 1)])))]
5015 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5017 vmovhpd\t{%1, %0|%0, %1}
5018 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
5022 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
5023 (set_attr "prefix" "vex")
5024 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
5026 (define_insn "sse2_storehpd"
5027 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5029 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
5030 (parallel [(const_int 1)])))]
5031 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5033 movhpd\t{%1, %0|%0, %1}
5038 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
5039 (set_attr "prefix_data16" "1,*,*,*,*")
5040 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
5043 [(set (match_operand:DF 0 "register_operand" "")
5045 (match_operand:V2DF 1 "memory_operand" "")
5046 (parallel [(const_int 1)])))]
5047 "TARGET_SSE2 && reload_completed"
5048 [(set (match_dup 0) (match_dup 1))]
5049 "operands[1] = adjust_address (operands[1], DFmode, 8);")
5051 ;; Avoid combining registers from different units in a single alternative,
5052 ;; see comment above inline_secondary_memory_needed function in i386.c
5053 (define_insn "sse2_storelpd"
5054 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5056 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
5057 (parallel [(const_int 0)])))]
5058 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5060 %vmovlpd\t{%1, %0|%0, %1}
5065 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
5066 (set_attr "prefix_data16" "1,*,*,*,*")
5067 (set_attr "prefix" "maybe_vex")
5068 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5071 [(set (match_operand:DF 0 "register_operand" "")
5073 (match_operand:V2DF 1 "nonimmediate_operand" "")
5074 (parallel [(const_int 0)])))]
5075 "TARGET_SSE2 && reload_completed"
5078 rtx op1 = operands[1];
5080 op1 = gen_rtx_REG (DFmode, REGNO (op1));
5082 op1 = gen_lowpart (DFmode, op1);
5083 emit_move_insn (operands[0], op1);
5087 (define_expand "sse2_loadhpd_exp"
5088 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
5091 (match_operand:V2DF 1 "nonimmediate_operand" "")
5092 (parallel [(const_int 0)]))
5093 (match_operand:DF 2 "nonimmediate_operand" "")))]
5095 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
5097 ;; Avoid combining registers from different units in a single alternative,
5098 ;; see comment above inline_secondary_memory_needed function in i386.c
5099 (define_insn "*avx_loadhpd"
5100 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
5103 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
5104 (parallel [(const_int 0)]))
5105 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
5106 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5108 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5109 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5113 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
5114 (set_attr "prefix" "vex")
5115 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
5117 (define_insn "sse2_loadhpd"
5118 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
5121 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
5122 (parallel [(const_int 0)]))
5123 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
5124 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5126 movhpd\t{%2, %0|%0, %2}
5127 unpcklpd\t{%2, %0|%0, %2}
5128 shufpd\t{$1, %1, %0|%0, %1, 1}
5132 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
5133 (set_attr "prefix_data16" "1,*,*,*,*,*")
5134 (set_attr "length_immediate" "*,*,1,*,*,*")
5135 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
5138 [(set (match_operand:V2DF 0 "memory_operand" "")
5140 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5141 (match_operand:DF 1 "register_operand" "")))]
5142 "TARGET_SSE2 && reload_completed"
5143 [(set (match_dup 0) (match_dup 1))]
5144 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5146 (define_expand "sse2_loadlpd_exp"
5147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
5149 (match_operand:DF 2 "nonimmediate_operand" "")
5151 (match_operand:V2DF 1 "nonimmediate_operand" "")
5152 (parallel [(const_int 1)]))))]
5154 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
5156 ;; Avoid combining registers from different units in a single alternative,
5157 ;; see comment above inline_secondary_memory_needed function in i386.c
5158 (define_insn "*avx_loadlpd"
5159 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
5161 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
5163 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
5164 (parallel [(const_int 1)]))))]
5165 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5167 vmovsd\t{%2, %0|%0, %2}
5168 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5169 vmovsd\t{%2, %1, %0|%0, %1, %2}
5170 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5174 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5175 (set_attr "prefix" "vex")
5176 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5178 (define_insn "sse2_loadlpd"
5179 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5181 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5183 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5184 (parallel [(const_int 1)]))))]
5185 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5187 movsd\t{%2, %0|%0, %2}
5188 movlpd\t{%2, %0|%0, %2}
5189 movsd\t{%2, %0|%0, %2}
5190 shufpd\t{$2, %2, %0|%0, %2, 2}
5191 movhpd\t{%H1, %0|%0, %H1}
5195 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5196 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5197 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5198 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5201 [(set (match_operand:V2DF 0 "memory_operand" "")
5203 (match_operand:DF 1 "register_operand" "")
5204 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5205 "TARGET_SSE2 && reload_completed"
5206 [(set (match_dup 0) (match_dup 1))]
5207 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5209 ;; Not sure these two are ever used, but it doesn't hurt to have
5211 (define_insn "*vec_extractv2df_1_sse"
5212 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5214 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5215 (parallel [(const_int 1)])))]
5216 "!TARGET_SSE2 && TARGET_SSE
5217 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5219 movhps\t{%1, %0|%0, %1}
5220 movhlps\t{%1, %0|%0, %1}
5221 movlps\t{%H1, %0|%0, %H1}"
5222 [(set_attr "type" "ssemov")
5223 (set_attr "mode" "V2SF,V4SF,V2SF")])
5225 (define_insn "*vec_extractv2df_0_sse"
5226 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5228 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5229 (parallel [(const_int 0)])))]
5230 "!TARGET_SSE2 && TARGET_SSE
5231 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5233 movlps\t{%1, %0|%0, %1}
5234 movaps\t{%1, %0|%0, %1}
5235 movlps\t{%1, %0|%0, %1}"
5236 [(set_attr "type" "ssemov")
5237 (set_attr "mode" "V2SF,V4SF,V2SF")])
5239 (define_insn "*avx_movsd"
5240 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5242 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5243 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5247 vmovsd\t{%2, %1, %0|%0, %1, %2}
5248 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5249 vmovlpd\t{%2, %0|%0, %2}
5250 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5251 vmovhps\t{%1, %H0|%H0, %1}"
5252 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5253 (set_attr "prefix" "vex")
5254 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5256 (define_insn "sse2_movsd"
5257 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5259 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5260 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5264 movsd\t{%2, %0|%0, %2}
5265 movlpd\t{%2, %0|%0, %2}
5266 movlpd\t{%2, %0|%0, %2}
5267 shufpd\t{$2, %2, %0|%0, %2, 2}
5268 movhps\t{%H1, %0|%0, %H1}
5269 movhps\t{%1, %H0|%H0, %1}"
5270 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5271 (set_attr "prefix_data16" "*,1,1,*,*,*")
5272 (set_attr "length_immediate" "*,*,*,1,*,*")
5273 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5275 (define_insn "*vec_dupv2df_sse3"
5276 [(set (match_operand:V2DF 0 "register_operand" "=x")
5278 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5280 "%vmovddup\t{%1, %0|%0, %1}"
5281 [(set_attr "type" "sselog1")
5282 (set_attr "prefix" "maybe_vex")
5283 (set_attr "mode" "DF")])
5285 (define_insn "vec_dupv2df"
5286 [(set (match_operand:V2DF 0 "register_operand" "=x")
5288 (match_operand:DF 1 "register_operand" "0")))]
5291 [(set_attr "type" "sselog1")
5292 (set_attr "mode" "V2DF")])
5294 (define_insn "*vec_concatv2df_sse3"
5295 [(set (match_operand:V2DF 0 "register_operand" "=x")
5297 (match_operand:DF 1 "nonimmediate_operand" "xm")
5300 "%vmovddup\t{%1, %0|%0, %1}"
5301 [(set_attr "type" "sselog1")
5302 (set_attr "prefix" "maybe_vex")
5303 (set_attr "mode" "DF")])
5305 (define_insn "*vec_concatv2df_avx"
5306 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5308 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5309 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5312 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5313 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5314 vmovsd\t{%1, %0|%0, %1}"
5315 [(set_attr "type" "ssemov")
5316 (set_attr "prefix" "vex")
5317 (set_attr "mode" "DF,V1DF,DF")])
5319 (define_insn "*vec_concatv2df"
5320 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5322 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5323 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5326 unpcklpd\t{%2, %0|%0, %2}
5327 movhpd\t{%2, %0|%0, %2}
5328 movsd\t{%1, %0|%0, %1}
5329 movlhps\t{%2, %0|%0, %2}
5330 movhps\t{%2, %0|%0, %2}"
5331 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5332 (set_attr "prefix_data16" "*,1,*,*,*")
5333 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5337 ;; Parallel integral arithmetic
5339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5341 (define_expand "neg<mode>2"
5342 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5345 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5347 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5349 (define_expand "<plusminus_insn><mode>3"
5350 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5352 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5353 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5355 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5357 (define_insn "*avx_<plusminus_insn><mode>3"
5358 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5360 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5361 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5362 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5363 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5364 [(set_attr "type" "sseiadd")
5365 (set_attr "prefix" "vex")
5366 (set_attr "mode" "TI")])
5368 (define_insn "*<plusminus_insn><mode>3"
5369 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5371 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5372 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5373 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5374 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5375 [(set_attr "type" "sseiadd")
5376 (set_attr "prefix_data16" "1")
5377 (set_attr "mode" "TI")])
5379 (define_expand "sse2_<plusminus_insn><mode>3"
5380 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5381 (sat_plusminus:SSEMODE12
5382 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5383 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5385 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5387 (define_insn "*avx_<plusminus_insn><mode>3"
5388 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5389 (sat_plusminus:SSEMODE12
5390 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5391 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5392 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5393 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5394 [(set_attr "type" "sseiadd")
5395 (set_attr "prefix" "vex")
5396 (set_attr "mode" "TI")])
5398 (define_insn "*sse2_<plusminus_insn><mode>3"
5399 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5400 (sat_plusminus:SSEMODE12
5401 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5402 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5403 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5404 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5405 [(set_attr "type" "sseiadd")
5406 (set_attr "prefix_data16" "1")
5407 (set_attr "mode" "TI")])
5409 (define_insn_and_split "mulv16qi3"
5410 [(set (match_operand:V16QI 0 "register_operand" "")
5411 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5412 (match_operand:V16QI 2 "register_operand" "")))]
5414 && can_create_pseudo_p ()"
5422 for (i = 0; i < 6; ++i)
5423 t[i] = gen_reg_rtx (V16QImode);
5425 /* Unpack data such that we've got a source byte in each low byte of
5426 each word. We don't care what goes into the high byte of each word.
5427 Rather than trying to get zero in there, most convenient is to let
5428 it be a copy of the low byte. */
5429 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5430 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5431 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5432 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5434 /* Multiply words. The end-of-line annotations here give a picture of what
5435 the output of that instruction looks like. Dot means don't care; the
5436 letters are the bytes of the result with A being the most significant. */
5437 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5438 gen_lowpart (V8HImode, t[0]),
5439 gen_lowpart (V8HImode, t[1])));
5440 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5441 gen_lowpart (V8HImode, t[2]),
5442 gen_lowpart (V8HImode, t[3])));
5444 /* Extract the even bytes and merge them back together. */
5445 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5449 (define_expand "mulv8hi3"
5450 [(set (match_operand:V8HI 0 "register_operand" "")
5451 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5452 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5454 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5456 (define_insn "*avx_mulv8hi3"
5457 [(set (match_operand:V8HI 0 "register_operand" "=x")
5458 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5459 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5460 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5461 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5462 [(set_attr "type" "sseimul")
5463 (set_attr "prefix" "vex")
5464 (set_attr "mode" "TI")])
5466 (define_insn "*mulv8hi3"
5467 [(set (match_operand:V8HI 0 "register_operand" "=x")
5468 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5469 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5470 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5471 "pmullw\t{%2, %0|%0, %2}"
5472 [(set_attr "type" "sseimul")
5473 (set_attr "prefix_data16" "1")
5474 (set_attr "mode" "TI")])
5476 (define_expand "<s>mulv8hi3_highpart"
5477 [(set (match_operand:V8HI 0 "register_operand" "")
5482 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5484 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5487 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5489 (define_insn "*avx_<s>mulv8hi3_highpart"
5490 [(set (match_operand:V8HI 0 "register_operand" "=x")
5495 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5497 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5499 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5500 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5501 [(set_attr "type" "sseimul")
5502 (set_attr "prefix" "vex")
5503 (set_attr "mode" "TI")])
5505 (define_insn "*<s>mulv8hi3_highpart"
5506 [(set (match_operand:V8HI 0 "register_operand" "=x")
5511 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5513 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5515 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5516 "pmulh<u>w\t{%2, %0|%0, %2}"
5517 [(set_attr "type" "sseimul")
5518 (set_attr "prefix_data16" "1")
5519 (set_attr "mode" "TI")])
5521 (define_expand "sse2_umulv2siv2di3"
5522 [(set (match_operand:V2DI 0 "register_operand" "")
5526 (match_operand:V4SI 1 "nonimmediate_operand" "")
5527 (parallel [(const_int 0) (const_int 2)])))
5530 (match_operand:V4SI 2 "nonimmediate_operand" "")
5531 (parallel [(const_int 0) (const_int 2)])))))]
5533 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5535 (define_insn "*avx_umulv2siv2di3"
5536 [(set (match_operand:V2DI 0 "register_operand" "=x")
5540 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5541 (parallel [(const_int 0) (const_int 2)])))
5544 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5545 (parallel [(const_int 0) (const_int 2)])))))]
5546 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5547 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5548 [(set_attr "type" "sseimul")
5549 (set_attr "prefix" "vex")
5550 (set_attr "mode" "TI")])
5552 (define_insn "*sse2_umulv2siv2di3"
5553 [(set (match_operand:V2DI 0 "register_operand" "=x")
5557 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5558 (parallel [(const_int 0) (const_int 2)])))
5561 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5562 (parallel [(const_int 0) (const_int 2)])))))]
5563 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5564 "pmuludq\t{%2, %0|%0, %2}"
5565 [(set_attr "type" "sseimul")
5566 (set_attr "prefix_data16" "1")
5567 (set_attr "mode" "TI")])
5569 (define_expand "sse4_1_mulv2siv2di3"
5570 [(set (match_operand:V2DI 0 "register_operand" "")
5574 (match_operand:V4SI 1 "nonimmediate_operand" "")
5575 (parallel [(const_int 0) (const_int 2)])))
5578 (match_operand:V4SI 2 "nonimmediate_operand" "")
5579 (parallel [(const_int 0) (const_int 2)])))))]
5581 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5583 (define_insn "*avx_mulv2siv2di3"
5584 [(set (match_operand:V2DI 0 "register_operand" "=x")
5588 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5589 (parallel [(const_int 0) (const_int 2)])))
5592 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5593 (parallel [(const_int 0) (const_int 2)])))))]
5594 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5595 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5596 [(set_attr "type" "sseimul")
5597 (set_attr "prefix_extra" "1")
5598 (set_attr "prefix" "vex")
5599 (set_attr "mode" "TI")])
5601 (define_insn "*sse4_1_mulv2siv2di3"
5602 [(set (match_operand:V2DI 0 "register_operand" "=x")
5606 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5607 (parallel [(const_int 0) (const_int 2)])))
5610 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5611 (parallel [(const_int 0) (const_int 2)])))))]
5612 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5613 "pmuldq\t{%2, %0|%0, %2}"
5614 [(set_attr "type" "sseimul")
5615 (set_attr "prefix_extra" "1")
5616 (set_attr "mode" "TI")])
5618 (define_expand "sse2_pmaddwd"
5619 [(set (match_operand:V4SI 0 "register_operand" "")
5624 (match_operand:V8HI 1 "nonimmediate_operand" "")
5625 (parallel [(const_int 0)
5631 (match_operand:V8HI 2 "nonimmediate_operand" "")
5632 (parallel [(const_int 0)
5638 (vec_select:V4HI (match_dup 1)
5639 (parallel [(const_int 1)
5644 (vec_select:V4HI (match_dup 2)
5645 (parallel [(const_int 1)
5648 (const_int 7)]))))))]
5650 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5652 (define_insn "*avx_pmaddwd"
5653 [(set (match_operand:V4SI 0 "register_operand" "=x")
5658 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5659 (parallel [(const_int 0)
5665 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5666 (parallel [(const_int 0)
5672 (vec_select:V4HI (match_dup 1)
5673 (parallel [(const_int 1)
5678 (vec_select:V4HI (match_dup 2)
5679 (parallel [(const_int 1)
5682 (const_int 7)]))))))]
5683 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5684 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5685 [(set_attr "type" "sseiadd")
5686 (set_attr "prefix" "vex")
5687 (set_attr "mode" "TI")])
5689 (define_insn "*sse2_pmaddwd"
5690 [(set (match_operand:V4SI 0 "register_operand" "=x")
5695 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5696 (parallel [(const_int 0)
5702 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5703 (parallel [(const_int 0)
5709 (vec_select:V4HI (match_dup 1)
5710 (parallel [(const_int 1)
5715 (vec_select:V4HI (match_dup 2)
5716 (parallel [(const_int 1)
5719 (const_int 7)]))))))]
5720 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5721 "pmaddwd\t{%2, %0|%0, %2}"
5722 [(set_attr "type" "sseiadd")
5723 (set_attr "atom_unit" "simul")
5724 (set_attr "prefix_data16" "1")
5725 (set_attr "mode" "TI")])
5727 (define_expand "mulv4si3"
5728 [(set (match_operand:V4SI 0 "register_operand" "")
5729 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5730 (match_operand:V4SI 2 "register_operand" "")))]
5733 if (TARGET_SSE4_1 || TARGET_AVX)
5734 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5737 (define_insn "*avx_mulv4si3"
5738 [(set (match_operand:V4SI 0 "register_operand" "=x")
5739 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5740 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5741 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5742 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5743 [(set_attr "type" "sseimul")
5744 (set_attr "prefix_extra" "1")
5745 (set_attr "prefix" "vex")
5746 (set_attr "mode" "TI")])
5748 (define_insn "*sse4_1_mulv4si3"
5749 [(set (match_operand:V4SI 0 "register_operand" "=x")
5750 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5751 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5752 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5753 "pmulld\t{%2, %0|%0, %2}"
5754 [(set_attr "type" "sseimul")
5755 (set_attr "prefix_extra" "1")
5756 (set_attr "mode" "TI")])
5758 (define_insn_and_split "*sse2_mulv4si3"
5759 [(set (match_operand:V4SI 0 "register_operand" "")
5760 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5761 (match_operand:V4SI 2 "register_operand" "")))]
5762 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5763 && can_create_pseudo_p ()"
5768 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5774 t1 = gen_reg_rtx (V4SImode);
5775 t2 = gen_reg_rtx (V4SImode);
5776 t3 = gen_reg_rtx (V4SImode);
5777 t4 = gen_reg_rtx (V4SImode);
5778 t5 = gen_reg_rtx (V4SImode);
5779 t6 = gen_reg_rtx (V4SImode);
5780 thirtytwo = GEN_INT (32);
5782 /* Multiply elements 2 and 0. */
5783 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5786 /* Shift both input vectors down one element, so that elements 3
5787 and 1 are now in the slots for elements 2 and 0. For K8, at
5788 least, this is faster than using a shuffle. */
5789 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5790 gen_lowpart (V1TImode, op1),
5792 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5793 gen_lowpart (V1TImode, op2),
5795 /* Multiply elements 3 and 1. */
5796 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5799 /* Move the results in element 2 down to element 1; we don't care
5800 what goes in elements 2 and 3. */
5801 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5802 const0_rtx, const0_rtx));
5803 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5804 const0_rtx, const0_rtx));
5806 /* Merge the parts back together. */
5807 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5811 (define_insn_and_split "mulv2di3"
5812 [(set (match_operand:V2DI 0 "register_operand" "")
5813 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5814 (match_operand:V2DI 2 "register_operand" "")))]
5816 && can_create_pseudo_p ()"
5821 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5830 /* op1: A,B,C,D, op2: E,F,G,H */
5831 op1 = gen_lowpart (V4SImode, op1);
5832 op2 = gen_lowpart (V4SImode, op2);
5834 t1 = gen_reg_rtx (V4SImode);
5835 t2 = gen_reg_rtx (V4SImode);
5836 t3 = gen_reg_rtx (V2DImode);
5837 t4 = gen_reg_rtx (V2DImode);
5840 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5846 /* t2: (B*E),(A*F),(D*G),(C*H) */
5847 emit_insn (gen_mulv4si3 (t2, t1, op2));
5849 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5850 emit_insn (gen_xop_phadddq (t3, t2));
5852 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5853 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5855 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5856 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5860 t1 = gen_reg_rtx (V2DImode);
5861 t2 = gen_reg_rtx (V2DImode);
5862 t3 = gen_reg_rtx (V2DImode);
5863 t4 = gen_reg_rtx (V2DImode);
5864 t5 = gen_reg_rtx (V2DImode);
5865 t6 = gen_reg_rtx (V2DImode);
5866 thirtytwo = GEN_INT (32);
5868 /* Multiply low parts. */
5869 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5870 gen_lowpart (V4SImode, op2)));
5872 /* Shift input vectors left 32 bits so we can multiply high parts. */
5873 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5874 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5876 /* Multiply high parts by low parts. */
5877 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5878 gen_lowpart (V4SImode, t3)));
5879 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5880 gen_lowpart (V4SImode, t2)));
5882 /* Shift them back. */
5883 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5884 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5886 /* Add the three parts together. */
5887 emit_insn (gen_addv2di3 (t6, t1, t4));
5888 emit_insn (gen_addv2di3 (op0, t6, t5));
5893 (define_expand "vec_widen_smult_hi_v8hi"
5894 [(match_operand:V4SI 0 "register_operand" "")
5895 (match_operand:V8HI 1 "register_operand" "")
5896 (match_operand:V8HI 2 "register_operand" "")]
5899 rtx op1, op2, t1, t2, dest;
5903 t1 = gen_reg_rtx (V8HImode);
5904 t2 = gen_reg_rtx (V8HImode);
5905 dest = gen_lowpart (V8HImode, operands[0]);
5907 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5908 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5909 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5913 (define_expand "vec_widen_smult_lo_v8hi"
5914 [(match_operand:V4SI 0 "register_operand" "")
5915 (match_operand:V8HI 1 "register_operand" "")
5916 (match_operand:V8HI 2 "register_operand" "")]
5919 rtx op1, op2, t1, t2, dest;
5923 t1 = gen_reg_rtx (V8HImode);
5924 t2 = gen_reg_rtx (V8HImode);
5925 dest = gen_lowpart (V8HImode, operands[0]);
5927 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5928 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5929 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5933 (define_expand "vec_widen_umult_hi_v8hi"
5934 [(match_operand:V4SI 0 "register_operand" "")
5935 (match_operand:V8HI 1 "register_operand" "")
5936 (match_operand:V8HI 2 "register_operand" "")]
5939 rtx op1, op2, t1, t2, dest;
5943 t1 = gen_reg_rtx (V8HImode);
5944 t2 = gen_reg_rtx (V8HImode);
5945 dest = gen_lowpart (V8HImode, operands[0]);
5947 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5948 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5949 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5953 (define_expand "vec_widen_umult_lo_v8hi"
5954 [(match_operand:V4SI 0 "register_operand" "")
5955 (match_operand:V8HI 1 "register_operand" "")
5956 (match_operand:V8HI 2 "register_operand" "")]
5959 rtx op1, op2, t1, t2, dest;
5963 t1 = gen_reg_rtx (V8HImode);
5964 t2 = gen_reg_rtx (V8HImode);
5965 dest = gen_lowpart (V8HImode, operands[0]);
5967 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5968 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5969 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5973 (define_expand "vec_widen_smult_hi_v4si"
5974 [(match_operand:V2DI 0 "register_operand" "")
5975 (match_operand:V4SI 1 "register_operand" "")
5976 (match_operand:V4SI 2 "register_operand" "")]
5981 t1 = gen_reg_rtx (V4SImode);
5982 t2 = gen_reg_rtx (V4SImode);
5984 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5989 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5994 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5998 (define_expand "vec_widen_smult_lo_v4si"
5999 [(match_operand:V2DI 0 "register_operand" "")
6000 (match_operand:V4SI 1 "register_operand" "")
6001 (match_operand:V4SI 2 "register_operand" "")]
6006 t1 = gen_reg_rtx (V4SImode);
6007 t2 = gen_reg_rtx (V4SImode);
6009 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
6014 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
6019 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
6023 (define_expand "vec_widen_umult_hi_v4si"
6024 [(match_operand:V2DI 0 "register_operand" "")
6025 (match_operand:V4SI 1 "register_operand" "")
6026 (match_operand:V4SI 2 "register_operand" "")]
6029 rtx op1, op2, t1, t2;
6033 t1 = gen_reg_rtx (V4SImode);
6034 t2 = gen_reg_rtx (V4SImode);
6036 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
6037 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
6038 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
6042 (define_expand "vec_widen_umult_lo_v4si"
6043 [(match_operand:V2DI 0 "register_operand" "")
6044 (match_operand:V4SI 1 "register_operand" "")
6045 (match_operand:V4SI 2 "register_operand" "")]
6048 rtx op1, op2, t1, t2;
6052 t1 = gen_reg_rtx (V4SImode);
6053 t2 = gen_reg_rtx (V4SImode);
6055 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
6056 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
6057 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
6061 (define_expand "sdot_prodv8hi"
6062 [(match_operand:V4SI 0 "register_operand" "")
6063 (match_operand:V8HI 1 "register_operand" "")
6064 (match_operand:V8HI 2 "register_operand" "")
6065 (match_operand:V4SI 3 "register_operand" "")]
6068 rtx t = gen_reg_rtx (V4SImode);
6069 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
6070 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
6074 (define_expand "udot_prodv4si"
6075 [(match_operand:V2DI 0 "register_operand" "")
6076 (match_operand:V4SI 1 "register_operand" "")
6077 (match_operand:V4SI 2 "register_operand" "")
6078 (match_operand:V2DI 3 "register_operand" "")]
6083 t1 = gen_reg_rtx (V2DImode);
6084 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
6085 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
6087 t2 = gen_reg_rtx (V4SImode);
6088 t3 = gen_reg_rtx (V4SImode);
6089 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
6090 gen_lowpart (V1TImode, operands[1]),
6092 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
6093 gen_lowpart (V1TImode, operands[2]),
6096 t4 = gen_reg_rtx (V2DImode);
6097 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
6099 emit_insn (gen_addv2di3 (operands[0], t1, t4));
6103 (define_insn "*avx_ashr<mode>3"
6104 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6106 (match_operand:SSEMODE24 1 "register_operand" "x")
6107 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6109 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6110 [(set_attr "type" "sseishft")
6111 (set_attr "prefix" "vex")
6112 (set (attr "length_immediate")
6113 (if_then_else (match_operand 2 "const_int_operand" "")
6115 (const_string "0")))
6116 (set_attr "mode" "TI")])
6118 (define_insn "ashr<mode>3"
6119 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6121 (match_operand:SSEMODE24 1 "register_operand" "0")
6122 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6124 "psra<ssevecsize>\t{%2, %0|%0, %2}"
6125 [(set_attr "type" "sseishft")
6126 (set_attr "prefix_data16" "1")
6127 (set (attr "length_immediate")
6128 (if_then_else (match_operand 2 "const_int_operand" "")
6130 (const_string "0")))
6131 (set_attr "mode" "TI")])
6133 (define_insn "*avx_lshrv1ti3"
6134 [(set (match_operand:V1TI 0 "register_operand" "=x")
6136 (match_operand:V1TI 1 "register_operand" "x")
6137 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6140 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6141 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6143 [(set_attr "type" "sseishft")
6144 (set_attr "prefix" "vex")
6145 (set_attr "length_immediate" "1")
6146 (set_attr "mode" "TI")])
6148 (define_insn "*avx_lshr<mode>3"
6149 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6150 (lshiftrt:SSEMODE248
6151 (match_operand:SSEMODE248 1 "register_operand" "x")
6152 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6154 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6155 [(set_attr "type" "sseishft")
6156 (set_attr "prefix" "vex")
6157 (set (attr "length_immediate")
6158 (if_then_else (match_operand 2 "const_int_operand" "")
6160 (const_string "0")))
6161 (set_attr "mode" "TI")])
6163 (define_insn "sse2_lshrv1ti3"
6164 [(set (match_operand:V1TI 0 "register_operand" "=x")
6166 (match_operand:V1TI 1 "register_operand" "0")
6167 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6170 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6171 return "psrldq\t{%2, %0|%0, %2}";
6173 [(set_attr "type" "sseishft")
6174 (set_attr "prefix_data16" "1")
6175 (set_attr "length_immediate" "1")
6176 (set_attr "atom_unit" "sishuf")
6177 (set_attr "mode" "TI")])
6179 (define_insn "lshr<mode>3"
6180 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6181 (lshiftrt:SSEMODE248
6182 (match_operand:SSEMODE248 1 "register_operand" "0")
6183 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6185 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6186 [(set_attr "type" "sseishft")
6187 (set_attr "prefix_data16" "1")
6188 (set (attr "length_immediate")
6189 (if_then_else (match_operand 2 "const_int_operand" "")
6191 (const_string "0")))
6192 (set_attr "mode" "TI")])
6194 (define_insn "*avx_ashlv1ti3"
6195 [(set (match_operand:V1TI 0 "register_operand" "=x")
6196 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6197 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6200 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6201 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6203 [(set_attr "type" "sseishft")
6204 (set_attr "prefix" "vex")
6205 (set_attr "length_immediate" "1")
6206 (set_attr "mode" "TI")])
6208 (define_insn "*avx_ashl<mode>3"
6209 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6211 (match_operand:SSEMODE248 1 "register_operand" "x")
6212 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6214 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6215 [(set_attr "type" "sseishft")
6216 (set_attr "prefix" "vex")
6217 (set (attr "length_immediate")
6218 (if_then_else (match_operand 2 "const_int_operand" "")
6220 (const_string "0")))
6221 (set_attr "mode" "TI")])
6223 (define_insn "sse2_ashlv1ti3"
6224 [(set (match_operand:V1TI 0 "register_operand" "=x")
6225 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6226 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6229 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6230 return "pslldq\t{%2, %0|%0, %2}";
6232 [(set_attr "type" "sseishft")
6233 (set_attr "prefix_data16" "1")
6234 (set_attr "length_immediate" "1")
6235 (set_attr "mode" "TI")])
6237 (define_insn "ashl<mode>3"
6238 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6240 (match_operand:SSEMODE248 1 "register_operand" "0")
6241 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6243 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6244 [(set_attr "type" "sseishft")
6245 (set_attr "prefix_data16" "1")
6246 (set (attr "length_immediate")
6247 (if_then_else (match_operand 2 "const_int_operand" "")
6249 (const_string "0")))
6250 (set_attr "mode" "TI")])
6252 (define_expand "vec_shl_<mode>"
6253 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6255 (match_operand:SSEMODEI 1 "register_operand" "")
6256 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6259 operands[0] = gen_lowpart (V1TImode, operands[0]);
6260 operands[1] = gen_lowpart (V1TImode, operands[1]);
6263 (define_expand "vec_shr_<mode>"
6264 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6266 (match_operand:SSEMODEI 1 "register_operand" "")
6267 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6270 operands[0] = gen_lowpart (V1TImode, operands[0]);
6271 operands[1] = gen_lowpart (V1TImode, operands[1]);
6274 (define_insn "*avx_<code><mode>3"
6275 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6277 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6278 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6279 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6280 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6281 [(set_attr "type" "sseiadd")
6282 (set (attr "prefix_extra")
6283 (if_then_else (match_operand:V16QI 0 "" "")
6285 (const_string "1")))
6286 (set_attr "prefix" "vex")
6287 (set_attr "mode" "TI")])
6289 (define_expand "<code>v16qi3"
6290 [(set (match_operand:V16QI 0 "register_operand" "")
6292 (match_operand:V16QI 1 "nonimmediate_operand" "")
6293 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6295 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6297 (define_insn "*<code>v16qi3"
6298 [(set (match_operand:V16QI 0 "register_operand" "=x")
6300 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6301 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6302 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6303 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6304 [(set_attr "type" "sseiadd")
6305 (set_attr "prefix_data16" "1")
6306 (set_attr "mode" "TI")])
6308 (define_insn "*avx_<code><mode>3"
6309 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6311 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6312 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6313 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6314 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6315 [(set_attr "type" "sseiadd")
6316 (set (attr "prefix_extra")
6317 (if_then_else (match_operand:V8HI 0 "" "")
6319 (const_string "1")))
6320 (set_attr "prefix" "vex")
6321 (set_attr "mode" "TI")])
6323 (define_expand "<code>v8hi3"
6324 [(set (match_operand:V8HI 0 "register_operand" "")
6326 (match_operand:V8HI 1 "nonimmediate_operand" "")
6327 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6329 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6331 (define_insn "*<code>v8hi3"
6332 [(set (match_operand:V8HI 0 "register_operand" "=x")
6334 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6335 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6336 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6337 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6338 [(set_attr "type" "sseiadd")
6339 (set_attr "prefix_data16" "1")
6340 (set_attr "mode" "TI")])
6342 (define_expand "umaxv8hi3"
6343 [(set (match_operand:V8HI 0 "register_operand" "")
6344 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6345 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6349 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6352 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6353 if (rtx_equal_p (op3, op2))
6354 op3 = gen_reg_rtx (V8HImode);
6355 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6356 emit_insn (gen_addv8hi3 (op0, op3, op2));
6361 (define_expand "smax<mode>3"
6362 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6363 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6364 (match_operand:SSEMODE14 2 "register_operand" "")))]
6368 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6374 xops[0] = operands[0];
6375 xops[1] = operands[1];
6376 xops[2] = operands[2];
6377 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6378 xops[4] = operands[1];
6379 xops[5] = operands[2];
6380 ok = ix86_expand_int_vcond (xops);
6386 (define_insn "*sse4_1_<code><mode>3"
6387 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6389 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6390 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6391 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6392 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6393 [(set_attr "type" "sseiadd")
6394 (set_attr "prefix_extra" "1")
6395 (set_attr "mode" "TI")])
6397 (define_expand "smaxv2di3"
6398 [(set (match_operand:V2DI 0 "register_operand" "")
6399 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6400 (match_operand:V2DI 2 "register_operand" "")))]
6406 xops[0] = operands[0];
6407 xops[1] = operands[1];
6408 xops[2] = operands[2];
6409 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6410 xops[4] = operands[1];
6411 xops[5] = operands[2];
6412 ok = ix86_expand_int_vcond (xops);
6417 (define_expand "umaxv4si3"
6418 [(set (match_operand:V4SI 0 "register_operand" "")
6419 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6420 (match_operand:V4SI 2 "register_operand" "")))]
6424 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6430 xops[0] = operands[0];
6431 xops[1] = operands[1];
6432 xops[2] = operands[2];
6433 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6434 xops[4] = operands[1];
6435 xops[5] = operands[2];
6436 ok = ix86_expand_int_vcond (xops);
6442 (define_insn "*sse4_1_<code><mode>3"
6443 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6445 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6446 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6447 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6448 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6449 [(set_attr "type" "sseiadd")
6450 (set_attr "prefix_extra" "1")
6451 (set_attr "mode" "TI")])
6453 (define_expand "umaxv2di3"
6454 [(set (match_operand:V2DI 0 "register_operand" "")
6455 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6456 (match_operand:V2DI 2 "register_operand" "")))]
6462 xops[0] = operands[0];
6463 xops[1] = operands[1];
6464 xops[2] = operands[2];
6465 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6466 xops[4] = operands[1];
6467 xops[5] = operands[2];
6468 ok = ix86_expand_int_vcond (xops);
6473 (define_expand "smin<mode>3"
6474 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6475 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6476 (match_operand:SSEMODE14 2 "register_operand" "")))]
6480 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6486 xops[0] = operands[0];
6487 xops[1] = operands[2];
6488 xops[2] = operands[1];
6489 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6490 xops[4] = operands[1];
6491 xops[5] = operands[2];
6492 ok = ix86_expand_int_vcond (xops);
6498 (define_expand "sminv2di3"
6499 [(set (match_operand:V2DI 0 "register_operand" "")
6500 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6501 (match_operand:V2DI 2 "register_operand" "")))]
6507 xops[0] = operands[0];
6508 xops[1] = operands[2];
6509 xops[2] = operands[1];
6510 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6511 xops[4] = operands[1];
6512 xops[5] = operands[2];
6513 ok = ix86_expand_int_vcond (xops);
6518 (define_expand "umin<mode>3"
6519 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6520 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6521 (match_operand:SSEMODE24 2 "register_operand" "")))]
6525 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6531 xops[0] = operands[0];
6532 xops[1] = operands[2];
6533 xops[2] = operands[1];
6534 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6535 xops[4] = operands[1];
6536 xops[5] = operands[2];
6537 ok = ix86_expand_int_vcond (xops);
6543 (define_expand "uminv2di3"
6544 [(set (match_operand:V2DI 0 "register_operand" "")
6545 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6546 (match_operand:V2DI 2 "register_operand" "")))]
6552 xops[0] = operands[0];
6553 xops[1] = operands[2];
6554 xops[2] = operands[1];
6555 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6556 xops[4] = operands[1];
6557 xops[5] = operands[2];
6558 ok = ix86_expand_int_vcond (xops);
6563 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6565 ;; Parallel integral comparisons
6567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6569 (define_expand "sse2_eq<mode>3"
6570 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6572 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6573 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6574 "TARGET_SSE2 && !TARGET_XOP "
6575 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6577 (define_insn "*avx_eq<mode>3"
6578 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6580 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6581 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6582 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6583 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6584 [(set_attr "type" "ssecmp")
6585 (set (attr "prefix_extra")
6586 (if_then_else (match_operand:V2DI 0 "" "")
6588 (const_string "*")))
6589 (set_attr "prefix" "vex")
6590 (set_attr "mode" "TI")])
6592 (define_insn "*sse2_eq<mode>3"
6593 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6595 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6596 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6597 "TARGET_SSE2 && !TARGET_XOP
6598 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6599 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6600 [(set_attr "type" "ssecmp")
6601 (set_attr "prefix_data16" "1")
6602 (set_attr "mode" "TI")])
6604 (define_expand "sse4_1_eqv2di3"
6605 [(set (match_operand:V2DI 0 "register_operand" "")
6607 (match_operand:V2DI 1 "nonimmediate_operand" "")
6608 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6610 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6612 (define_insn "*sse4_1_eqv2di3"
6613 [(set (match_operand:V2DI 0 "register_operand" "=x")
6615 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6616 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6617 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6618 "pcmpeqq\t{%2, %0|%0, %2}"
6619 [(set_attr "type" "ssecmp")
6620 (set_attr "prefix_extra" "1")
6621 (set_attr "mode" "TI")])
6623 (define_insn "*avx_gt<mode>3"
6624 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6626 (match_operand:SSEMODE1248 1 "register_operand" "x")
6627 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6629 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6630 [(set_attr "type" "ssecmp")
6631 (set (attr "prefix_extra")
6632 (if_then_else (match_operand:V2DI 0 "" "")
6634 (const_string "*")))
6635 (set_attr "prefix" "vex")
6636 (set_attr "mode" "TI")])
6638 (define_insn "sse2_gt<mode>3"
6639 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6641 (match_operand:SSEMODE124 1 "register_operand" "0")
6642 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6643 "TARGET_SSE2 && !TARGET_XOP"
6644 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6645 [(set_attr "type" "ssecmp")
6646 (set_attr "prefix_data16" "1")
6647 (set_attr "mode" "TI")])
6649 (define_insn "sse4_2_gtv2di3"
6650 [(set (match_operand:V2DI 0 "register_operand" "=x")
6652 (match_operand:V2DI 1 "register_operand" "0")
6653 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6655 "pcmpgtq\t{%2, %0|%0, %2}"
6656 [(set_attr "type" "ssecmp")
6657 (set_attr "prefix_extra" "1")
6658 (set_attr "mode" "TI")])
6660 (define_expand "vcond<mode>"
6661 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6662 (if_then_else:SSEMODE124C8
6663 (match_operator 3 ""
6664 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6665 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6666 (match_operand:SSEMODE124C8 1 "general_operand" "")
6667 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6670 bool ok = ix86_expand_int_vcond (operands);
6675 (define_expand "vcondu<mode>"
6676 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6677 (if_then_else:SSEMODE124C8
6678 (match_operator 3 ""
6679 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6680 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6681 (match_operand:SSEMODE124C8 1 "general_operand" "")
6682 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6685 bool ok = ix86_expand_int_vcond (operands);
6690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6692 ;; Parallel bitwise logical operations
6694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6696 (define_expand "one_cmpl<mode>2"
6697 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6698 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6702 int i, n = GET_MODE_NUNITS (<MODE>mode);
6703 rtvec v = rtvec_alloc (n);
6705 for (i = 0; i < n; ++i)
6706 RTVEC_ELT (v, i) = constm1_rtx;
6708 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6711 (define_insn "*avx_andnot<mode>3"
6712 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6714 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6715 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6717 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6718 [(set_attr "type" "sselog")
6719 (set_attr "prefix" "vex")
6720 (set_attr "mode" "<avxvecpsmode>")])
6722 (define_insn "*sse_andnot<mode>3"
6723 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6725 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6726 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6727 "(TARGET_SSE && !TARGET_SSE2)"
6728 "andnps\t{%2, %0|%0, %2}"
6729 [(set_attr "type" "sselog")
6730 (set_attr "mode" "V4SF")])
6732 (define_insn "*avx_andnot<mode>3"
6733 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6735 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6736 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6738 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6739 [(set_attr "type" "sselog")
6740 (set_attr "prefix" "vex")
6741 (set_attr "mode" "TI")])
6743 (define_insn "sse2_andnot<mode>3"
6744 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6746 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6747 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6749 "pandn\t{%2, %0|%0, %2}"
6750 [(set_attr "type" "sselog")
6751 (set_attr "prefix_data16" "1")
6752 (set_attr "mode" "TI")])
6754 (define_insn "*andnottf3"
6755 [(set (match_operand:TF 0 "register_operand" "=x")
6757 (not:TF (match_operand:TF 1 "register_operand" "0"))
6758 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6760 "pandn\t{%2, %0|%0, %2}"
6761 [(set_attr "type" "sselog")
6762 (set_attr "prefix_data16" "1")
6763 (set_attr "mode" "TI")])
6765 (define_expand "<code><mode>3"
6766 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6768 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6769 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6771 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6773 (define_insn "*avx_<code><mode>3"
6774 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6775 (any_logic:AVX256MODEI
6776 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6777 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6779 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6780 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6781 [(set_attr "type" "sselog")
6782 (set_attr "prefix" "vex")
6783 (set_attr "mode" "<avxvecpsmode>")])
6785 (define_insn "*sse_<code><mode>3"
6786 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6788 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6789 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6790 "(TARGET_SSE && !TARGET_SSE2)
6791 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6792 "<logic>ps\t{%2, %0|%0, %2}"
6793 [(set_attr "type" "sselog")
6794 (set_attr "mode" "V4SF")])
6796 (define_insn "*avx_<code><mode>3"
6797 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6799 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6800 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6802 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6803 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6804 [(set_attr "type" "sselog")
6805 (set_attr "prefix" "vex")
6806 (set_attr "mode" "TI")])
6808 (define_insn "*sse2_<code><mode>3"
6809 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6811 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6812 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6813 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6814 "p<logic>\t{%2, %0|%0, %2}"
6815 [(set_attr "type" "sselog")
6816 (set_attr "prefix_data16" "1")
6817 (set_attr "mode" "TI")])
6819 (define_expand "<code>tf3"
6820 [(set (match_operand:TF 0 "register_operand" "")
6822 (match_operand:TF 1 "nonimmediate_operand" "")
6823 (match_operand:TF 2 "nonimmediate_operand" "")))]
6825 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6827 (define_insn "*<code>tf3"
6828 [(set (match_operand:TF 0 "register_operand" "=x")
6830 (match_operand:TF 1 "nonimmediate_operand" "%0")
6831 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6832 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6833 "p<logic>\t{%2, %0|%0, %2}"
6834 [(set_attr "type" "sselog")
6835 (set_attr "prefix_data16" "1")
6836 (set_attr "mode" "TI")])
6838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6840 ;; Parallel integral element swizzling
6842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6844 (define_expand "vec_pack_trunc_v8hi"
6845 [(match_operand:V16QI 0 "register_operand" "")
6846 (match_operand:V8HI 1 "register_operand" "")
6847 (match_operand:V8HI 2 "register_operand" "")]
6850 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6851 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6852 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6856 (define_expand "vec_pack_trunc_v4si"
6857 [(match_operand:V8HI 0 "register_operand" "")
6858 (match_operand:V4SI 1 "register_operand" "")
6859 (match_operand:V4SI 2 "register_operand" "")]
6862 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6863 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6864 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6868 (define_expand "vec_pack_trunc_v2di"
6869 [(match_operand:V4SI 0 "register_operand" "")
6870 (match_operand:V2DI 1 "register_operand" "")
6871 (match_operand:V2DI 2 "register_operand" "")]
6874 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6875 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6876 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6880 (define_insn "*avx_packsswb"
6881 [(set (match_operand:V16QI 0 "register_operand" "=x")
6884 (match_operand:V8HI 1 "register_operand" "x"))
6886 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6888 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6889 [(set_attr "type" "sselog")
6890 (set_attr "prefix" "vex")
6891 (set_attr "mode" "TI")])
6893 (define_insn "sse2_packsswb"
6894 [(set (match_operand:V16QI 0 "register_operand" "=x")
6897 (match_operand:V8HI 1 "register_operand" "0"))
6899 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6901 "packsswb\t{%2, %0|%0, %2}"
6902 [(set_attr "type" "sselog")
6903 (set_attr "prefix_data16" "1")
6904 (set_attr "mode" "TI")])
6906 (define_insn "*avx_packssdw"
6907 [(set (match_operand:V8HI 0 "register_operand" "=x")
6910 (match_operand:V4SI 1 "register_operand" "x"))
6912 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6914 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6915 [(set_attr "type" "sselog")
6916 (set_attr "prefix" "vex")
6917 (set_attr "mode" "TI")])
6919 (define_insn "sse2_packssdw"
6920 [(set (match_operand:V8HI 0 "register_operand" "=x")
6923 (match_operand:V4SI 1 "register_operand" "0"))
6925 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6927 "packssdw\t{%2, %0|%0, %2}"
6928 [(set_attr "type" "sselog")
6929 (set_attr "prefix_data16" "1")
6930 (set_attr "mode" "TI")])
6932 (define_insn "*avx_packuswb"
6933 [(set (match_operand:V16QI 0 "register_operand" "=x")
6936 (match_operand:V8HI 1 "register_operand" "x"))
6938 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6940 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6941 [(set_attr "type" "sselog")
6942 (set_attr "prefix" "vex")
6943 (set_attr "mode" "TI")])
6945 (define_insn "sse2_packuswb"
6946 [(set (match_operand:V16QI 0 "register_operand" "=x")
6949 (match_operand:V8HI 1 "register_operand" "0"))
6951 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6953 "packuswb\t{%2, %0|%0, %2}"
6954 [(set_attr "type" "sselog")
6955 (set_attr "prefix_data16" "1")
6956 (set_attr "mode" "TI")])
6958 (define_insn "*avx_interleave_highv16qi"
6959 [(set (match_operand:V16QI 0 "register_operand" "=x")
6962 (match_operand:V16QI 1 "register_operand" "x")
6963 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6964 (parallel [(const_int 8) (const_int 24)
6965 (const_int 9) (const_int 25)
6966 (const_int 10) (const_int 26)
6967 (const_int 11) (const_int 27)
6968 (const_int 12) (const_int 28)
6969 (const_int 13) (const_int 29)
6970 (const_int 14) (const_int 30)
6971 (const_int 15) (const_int 31)])))]
6973 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6974 [(set_attr "type" "sselog")
6975 (set_attr "prefix" "vex")
6976 (set_attr "mode" "TI")])
6978 (define_insn "vec_interleave_highv16qi"
6979 [(set (match_operand:V16QI 0 "register_operand" "=x")
6982 (match_operand:V16QI 1 "register_operand" "0")
6983 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6984 (parallel [(const_int 8) (const_int 24)
6985 (const_int 9) (const_int 25)
6986 (const_int 10) (const_int 26)
6987 (const_int 11) (const_int 27)
6988 (const_int 12) (const_int 28)
6989 (const_int 13) (const_int 29)
6990 (const_int 14) (const_int 30)
6991 (const_int 15) (const_int 31)])))]
6993 "punpckhbw\t{%2, %0|%0, %2}"
6994 [(set_attr "type" "sselog")
6995 (set_attr "prefix_data16" "1")
6996 (set_attr "mode" "TI")])
6998 (define_insn "*avx_interleave_lowv16qi"
6999 [(set (match_operand:V16QI 0 "register_operand" "=x")
7002 (match_operand:V16QI 1 "register_operand" "x")
7003 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
7004 (parallel [(const_int 0) (const_int 16)
7005 (const_int 1) (const_int 17)
7006 (const_int 2) (const_int 18)
7007 (const_int 3) (const_int 19)
7008 (const_int 4) (const_int 20)
7009 (const_int 5) (const_int 21)
7010 (const_int 6) (const_int 22)
7011 (const_int 7) (const_int 23)])))]
7013 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
7014 [(set_attr "type" "sselog")
7015 (set_attr "prefix" "vex")
7016 (set_attr "mode" "TI")])
7018 (define_insn "vec_interleave_lowv16qi"
7019 [(set (match_operand:V16QI 0 "register_operand" "=x")
7022 (match_operand:V16QI 1 "register_operand" "0")
7023 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
7024 (parallel [(const_int 0) (const_int 16)
7025 (const_int 1) (const_int 17)
7026 (const_int 2) (const_int 18)
7027 (const_int 3) (const_int 19)
7028 (const_int 4) (const_int 20)
7029 (const_int 5) (const_int 21)
7030 (const_int 6) (const_int 22)
7031 (const_int 7) (const_int 23)])))]
7033 "punpcklbw\t{%2, %0|%0, %2}"
7034 [(set_attr "type" "sselog")
7035 (set_attr "prefix_data16" "1")
7036 (set_attr "mode" "TI")])
7038 (define_insn "*avx_interleave_highv8hi"
7039 [(set (match_operand:V8HI 0 "register_operand" "=x")
7042 (match_operand:V8HI 1 "register_operand" "x")
7043 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7044 (parallel [(const_int 4) (const_int 12)
7045 (const_int 5) (const_int 13)
7046 (const_int 6) (const_int 14)
7047 (const_int 7) (const_int 15)])))]
7049 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
7050 [(set_attr "type" "sselog")
7051 (set_attr "prefix" "vex")
7052 (set_attr "mode" "TI")])
7054 (define_insn "vec_interleave_highv8hi"
7055 [(set (match_operand:V8HI 0 "register_operand" "=x")
7058 (match_operand:V8HI 1 "register_operand" "0")
7059 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7060 (parallel [(const_int 4) (const_int 12)
7061 (const_int 5) (const_int 13)
7062 (const_int 6) (const_int 14)
7063 (const_int 7) (const_int 15)])))]
7065 "punpckhwd\t{%2, %0|%0, %2}"
7066 [(set_attr "type" "sselog")
7067 (set_attr "prefix_data16" "1")
7068 (set_attr "mode" "TI")])
7070 (define_insn "*avx_interleave_lowv8hi"
7071 [(set (match_operand:V8HI 0 "register_operand" "=x")
7074 (match_operand:V8HI 1 "register_operand" "x")
7075 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7076 (parallel [(const_int 0) (const_int 8)
7077 (const_int 1) (const_int 9)
7078 (const_int 2) (const_int 10)
7079 (const_int 3) (const_int 11)])))]
7081 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7082 [(set_attr "type" "sselog")
7083 (set_attr "prefix" "vex")
7084 (set_attr "mode" "TI")])
7086 (define_insn "vec_interleave_lowv8hi"
7087 [(set (match_operand:V8HI 0 "register_operand" "=x")
7090 (match_operand:V8HI 1 "register_operand" "0")
7091 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7092 (parallel [(const_int 0) (const_int 8)
7093 (const_int 1) (const_int 9)
7094 (const_int 2) (const_int 10)
7095 (const_int 3) (const_int 11)])))]
7097 "punpcklwd\t{%2, %0|%0, %2}"
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix_data16" "1")
7100 (set_attr "mode" "TI")])
7102 (define_insn "*avx_interleave_highv4si"
7103 [(set (match_operand:V4SI 0 "register_operand" "=x")
7106 (match_operand:V4SI 1 "register_operand" "x")
7107 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7108 (parallel [(const_int 2) (const_int 6)
7109 (const_int 3) (const_int 7)])))]
7111 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7112 [(set_attr "type" "sselog")
7113 (set_attr "prefix" "vex")
7114 (set_attr "mode" "TI")])
7116 (define_insn "vec_interleave_highv4si"
7117 [(set (match_operand:V4SI 0 "register_operand" "=x")
7120 (match_operand:V4SI 1 "register_operand" "0")
7121 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7122 (parallel [(const_int 2) (const_int 6)
7123 (const_int 3) (const_int 7)])))]
7125 "punpckhdq\t{%2, %0|%0, %2}"
7126 [(set_attr "type" "sselog")
7127 (set_attr "prefix_data16" "1")
7128 (set_attr "mode" "TI")])
7130 (define_insn "*avx_interleave_lowv4si"
7131 [(set (match_operand:V4SI 0 "register_operand" "=x")
7134 (match_operand:V4SI 1 "register_operand" "x")
7135 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7136 (parallel [(const_int 0) (const_int 4)
7137 (const_int 1) (const_int 5)])))]
7139 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7140 [(set_attr "type" "sselog")
7141 (set_attr "prefix" "vex")
7142 (set_attr "mode" "TI")])
7144 (define_insn "vec_interleave_lowv4si"
7145 [(set (match_operand:V4SI 0 "register_operand" "=x")
7148 (match_operand:V4SI 1 "register_operand" "0")
7149 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7150 (parallel [(const_int 0) (const_int 4)
7151 (const_int 1) (const_int 5)])))]
7153 "punpckldq\t{%2, %0|%0, %2}"
7154 [(set_attr "type" "sselog")
7155 (set_attr "prefix_data16" "1")
7156 (set_attr "mode" "TI")])
7158 (define_insn "*avx_pinsr<ssevecsize>"
7159 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7160 (vec_merge:SSEMODE124
7161 (vec_duplicate:SSEMODE124
7162 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7163 (match_operand:SSEMODE124 1 "register_operand" "x")
7164 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7167 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7168 if (MEM_P (operands[2]))
7169 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7171 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7173 [(set_attr "type" "sselog")
7174 (set (attr "prefix_extra")
7175 (if_then_else (match_operand:V8HI 0 "" "")
7177 (const_string "1")))
7178 (set_attr "length_immediate" "1")
7179 (set_attr "prefix" "vex")
7180 (set_attr "mode" "TI")])
7182 (define_insn "*sse4_1_pinsrb"
7183 [(set (match_operand:V16QI 0 "register_operand" "=x")
7185 (vec_duplicate:V16QI
7186 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7187 (match_operand:V16QI 1 "register_operand" "0")
7188 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7191 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7192 if (MEM_P (operands[2]))
7193 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
7195 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7197 [(set_attr "type" "sselog")
7198 (set_attr "prefix_extra" "1")
7199 (set_attr "length_immediate" "1")
7200 (set_attr "mode" "TI")])
7202 (define_insn "*sse2_pinsrw"
7203 [(set (match_operand:V8HI 0 "register_operand" "=x")
7206 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7207 (match_operand:V8HI 1 "register_operand" "0")
7208 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7211 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7212 if (MEM_P (operands[2]))
7213 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7215 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7217 [(set_attr "type" "sselog")
7218 (set_attr "prefix_data16" "1")
7219 (set_attr "length_immediate" "1")
7220 (set_attr "mode" "TI")])
7222 ;; It must come before sse2_loadld since it is preferred.
7223 (define_insn "*sse4_1_pinsrd"
7224 [(set (match_operand:V4SI 0 "register_operand" "=x")
7227 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7228 (match_operand:V4SI 1 "register_operand" "0")
7229 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7232 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7233 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7235 [(set_attr "type" "sselog")
7236 (set_attr "prefix_extra" "1")
7237 (set_attr "length_immediate" "1")
7238 (set_attr "mode" "TI")])
7240 (define_insn "*avx_pinsrq"
7241 [(set (match_operand:V2DI 0 "register_operand" "=x")
7244 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7245 (match_operand:V2DI 1 "register_operand" "x")
7246 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7247 "TARGET_AVX && TARGET_64BIT"
7249 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7250 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7252 [(set_attr "type" "sselog")
7253 (set_attr "prefix_extra" "1")
7254 (set_attr "length_immediate" "1")
7255 (set_attr "prefix" "vex")
7256 (set_attr "mode" "TI")])
7258 (define_insn "*sse4_1_pinsrq"
7259 [(set (match_operand:V2DI 0 "register_operand" "=x")
7262 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7263 (match_operand:V2DI 1 "register_operand" "0")
7264 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7265 "TARGET_SSE4_1 && TARGET_64BIT"
7267 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7268 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7270 [(set_attr "type" "sselog")
7271 (set_attr "prefix_rex" "1")
7272 (set_attr "prefix_extra" "1")
7273 (set_attr "length_immediate" "1")
7274 (set_attr "mode" "TI")])
7276 (define_insn "*sse4_1_pextrb_<mode>"
7277 [(set (match_operand:SWI48 0 "register_operand" "=r")
7280 (match_operand:V16QI 1 "register_operand" "x")
7281 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7283 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7284 [(set_attr "type" "sselog")
7285 (set_attr "prefix_extra" "1")
7286 (set_attr "length_immediate" "1")
7287 (set_attr "prefix" "maybe_vex")
7288 (set_attr "mode" "TI")])
7290 (define_insn "*sse4_1_pextrb_memory"
7291 [(set (match_operand:QI 0 "memory_operand" "=m")
7293 (match_operand:V16QI 1 "register_operand" "x")
7294 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7296 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7297 [(set_attr "type" "sselog")
7298 (set_attr "prefix_extra" "1")
7299 (set_attr "length_immediate" "1")
7300 (set_attr "prefix" "maybe_vex")
7301 (set_attr "mode" "TI")])
7303 (define_insn "*sse2_pextrw_<mode>"
7304 [(set (match_operand:SWI48 0 "register_operand" "=r")
7307 (match_operand:V8HI 1 "register_operand" "x")
7308 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7310 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7311 [(set_attr "type" "sselog")
7312 (set_attr "prefix_data16" "1")
7313 (set_attr "length_immediate" "1")
7314 (set_attr "prefix" "maybe_vex")
7315 (set_attr "mode" "TI")])
7317 (define_insn "*sse4_1_pextrw_memory"
7318 [(set (match_operand:HI 0 "memory_operand" "=m")
7320 (match_operand:V8HI 1 "register_operand" "x")
7321 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7323 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7324 [(set_attr "type" "sselog")
7325 (set_attr "prefix_extra" "1")
7326 (set_attr "length_immediate" "1")
7327 (set_attr "prefix" "maybe_vex")
7328 (set_attr "mode" "TI")])
7330 (define_insn "*sse4_1_pextrd"
7331 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7333 (match_operand:V4SI 1 "register_operand" "x")
7334 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7336 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7337 [(set_attr "type" "sselog")
7338 (set_attr "prefix_extra" "1")
7339 (set_attr "length_immediate" "1")
7340 (set_attr "prefix" "maybe_vex")
7341 (set_attr "mode" "TI")])
7343 (define_insn "*sse4_1_pextrd_zext"
7344 [(set (match_operand:DI 0 "register_operand" "=r")
7347 (match_operand:V4SI 1 "register_operand" "x")
7348 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7349 "TARGET_64BIT && TARGET_SSE4_1"
7350 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7351 [(set_attr "type" "sselog")
7352 (set_attr "prefix_extra" "1")
7353 (set_attr "length_immediate" "1")
7354 (set_attr "prefix" "maybe_vex")
7355 (set_attr "mode" "TI")])
7357 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7358 (define_insn "*sse4_1_pextrq"
7359 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7361 (match_operand:V2DI 1 "register_operand" "x")
7362 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7363 "TARGET_SSE4_1 && TARGET_64BIT"
7364 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7365 [(set_attr "type" "sselog")
7366 (set_attr "prefix_rex" "1")
7367 (set_attr "prefix_extra" "1")
7368 (set_attr "length_immediate" "1")
7369 (set_attr "prefix" "maybe_vex")
7370 (set_attr "mode" "TI")])
7372 (define_expand "sse2_pshufd"
7373 [(match_operand:V4SI 0 "register_operand" "")
7374 (match_operand:V4SI 1 "nonimmediate_operand" "")
7375 (match_operand:SI 2 "const_int_operand" "")]
7378 int mask = INTVAL (operands[2]);
7379 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7380 GEN_INT ((mask >> 0) & 3),
7381 GEN_INT ((mask >> 2) & 3),
7382 GEN_INT ((mask >> 4) & 3),
7383 GEN_INT ((mask >> 6) & 3)));
7387 (define_insn "sse2_pshufd_1"
7388 [(set (match_operand:V4SI 0 "register_operand" "=x")
7390 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7391 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7392 (match_operand 3 "const_0_to_3_operand" "")
7393 (match_operand 4 "const_0_to_3_operand" "")
7394 (match_operand 5 "const_0_to_3_operand" "")])))]
7398 mask |= INTVAL (operands[2]) << 0;
7399 mask |= INTVAL (operands[3]) << 2;
7400 mask |= INTVAL (operands[4]) << 4;
7401 mask |= INTVAL (operands[5]) << 6;
7402 operands[2] = GEN_INT (mask);
7404 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7406 [(set_attr "type" "sselog1")
7407 (set_attr "prefix_data16" "1")
7408 (set_attr "prefix" "maybe_vex")
7409 (set_attr "length_immediate" "1")
7410 (set_attr "mode" "TI")])
7412 (define_expand "sse2_pshuflw"
7413 [(match_operand:V8HI 0 "register_operand" "")
7414 (match_operand:V8HI 1 "nonimmediate_operand" "")
7415 (match_operand:SI 2 "const_int_operand" "")]
7418 int mask = INTVAL (operands[2]);
7419 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7420 GEN_INT ((mask >> 0) & 3),
7421 GEN_INT ((mask >> 2) & 3),
7422 GEN_INT ((mask >> 4) & 3),
7423 GEN_INT ((mask >> 6) & 3)));
7427 (define_insn "sse2_pshuflw_1"
7428 [(set (match_operand:V8HI 0 "register_operand" "=x")
7430 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7431 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7432 (match_operand 3 "const_0_to_3_operand" "")
7433 (match_operand 4 "const_0_to_3_operand" "")
7434 (match_operand 5 "const_0_to_3_operand" "")
7442 mask |= INTVAL (operands[2]) << 0;
7443 mask |= INTVAL (operands[3]) << 2;
7444 mask |= INTVAL (operands[4]) << 4;
7445 mask |= INTVAL (operands[5]) << 6;
7446 operands[2] = GEN_INT (mask);
7448 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7450 [(set_attr "type" "sselog")
7451 (set_attr "prefix_data16" "0")
7452 (set_attr "prefix_rep" "1")
7453 (set_attr "prefix" "maybe_vex")
7454 (set_attr "length_immediate" "1")
7455 (set_attr "mode" "TI")])
7457 (define_expand "sse2_pshufhw"
7458 [(match_operand:V8HI 0 "register_operand" "")
7459 (match_operand:V8HI 1 "nonimmediate_operand" "")
7460 (match_operand:SI 2 "const_int_operand" "")]
7463 int mask = INTVAL (operands[2]);
7464 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7465 GEN_INT (((mask >> 0) & 3) + 4),
7466 GEN_INT (((mask >> 2) & 3) + 4),
7467 GEN_INT (((mask >> 4) & 3) + 4),
7468 GEN_INT (((mask >> 6) & 3) + 4)));
7472 (define_insn "sse2_pshufhw_1"
7473 [(set (match_operand:V8HI 0 "register_operand" "=x")
7475 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7476 (parallel [(const_int 0)
7480 (match_operand 2 "const_4_to_7_operand" "")
7481 (match_operand 3 "const_4_to_7_operand" "")
7482 (match_operand 4 "const_4_to_7_operand" "")
7483 (match_operand 5 "const_4_to_7_operand" "")])))]
7487 mask |= (INTVAL (operands[2]) - 4) << 0;
7488 mask |= (INTVAL (operands[3]) - 4) << 2;
7489 mask |= (INTVAL (operands[4]) - 4) << 4;
7490 mask |= (INTVAL (operands[5]) - 4) << 6;
7491 operands[2] = GEN_INT (mask);
7493 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7495 [(set_attr "type" "sselog")
7496 (set_attr "prefix_rep" "1")
7497 (set_attr "prefix_data16" "0")
7498 (set_attr "prefix" "maybe_vex")
7499 (set_attr "length_immediate" "1")
7500 (set_attr "mode" "TI")])
7502 (define_expand "sse2_loadd"
7503 [(set (match_operand:V4SI 0 "register_operand" "")
7506 (match_operand:SI 1 "nonimmediate_operand" ""))
7510 "operands[2] = CONST0_RTX (V4SImode);")
7512 (define_insn "*avx_loadld"
7513 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7516 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7517 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7521 vmovd\t{%2, %0|%0, %2}
7522 vmovd\t{%2, %0|%0, %2}
7523 vmovss\t{%2, %1, %0|%0, %1, %2}"
7524 [(set_attr "type" "ssemov")
7525 (set_attr "prefix" "vex")
7526 (set_attr "mode" "TI,TI,V4SF")])
7528 (define_insn "sse2_loadld"
7529 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7532 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7533 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7537 movd\t{%2, %0|%0, %2}
7538 movd\t{%2, %0|%0, %2}
7539 movss\t{%2, %0|%0, %2}
7540 movss\t{%2, %0|%0, %2}"
7541 [(set_attr "type" "ssemov")
7542 (set_attr "mode" "TI,TI,V4SF,SF")])
7544 (define_insn_and_split "sse2_stored"
7545 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7547 (match_operand:V4SI 1 "register_operand" "x,Yi")
7548 (parallel [(const_int 0)])))]
7551 "&& reload_completed
7552 && (TARGET_INTER_UNIT_MOVES
7553 || MEM_P (operands [0])
7554 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7555 [(set (match_dup 0) (match_dup 1))]
7556 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7558 (define_insn_and_split "*vec_ext_v4si_mem"
7559 [(set (match_operand:SI 0 "register_operand" "=r")
7561 (match_operand:V4SI 1 "memory_operand" "o")
7562 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7568 int i = INTVAL (operands[2]);
7570 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7574 (define_expand "sse_storeq"
7575 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7577 (match_operand:V2DI 1 "register_operand" "")
7578 (parallel [(const_int 0)])))]
7581 (define_insn "*sse2_storeq_rex64"
7582 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7584 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7585 (parallel [(const_int 0)])))]
7586 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7590 %vmov{q}\t{%1, %0|%0, %1}"
7591 [(set_attr "type" "*,*,imov")
7592 (set_attr "prefix" "*,*,maybe_vex")
7593 (set_attr "mode" "*,*,DI")])
7595 (define_insn "*sse2_storeq"
7596 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7598 (match_operand:V2DI 1 "register_operand" "x")
7599 (parallel [(const_int 0)])))]
7604 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7606 (match_operand:V2DI 1 "register_operand" "")
7607 (parallel [(const_int 0)])))]
7610 && (TARGET_INTER_UNIT_MOVES
7611 || MEM_P (operands [0])
7612 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7613 [(set (match_dup 0) (match_dup 1))]
7614 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7616 (define_insn "*vec_extractv2di_1_rex64_avx"
7617 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7619 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7620 (parallel [(const_int 1)])))]
7623 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7625 vmovhps\t{%1, %0|%0, %1}
7626 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7627 vmovq\t{%H1, %0|%0, %H1}
7628 vmov{q}\t{%H1, %0|%0, %H1}"
7629 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7630 (set_attr "length_immediate" "*,1,*,*")
7631 (set_attr "memory" "*,none,*,*")
7632 (set_attr "prefix" "vex")
7633 (set_attr "mode" "V2SF,TI,TI,DI")])
7635 (define_insn "*vec_extractv2di_1_rex64"
7636 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7638 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7639 (parallel [(const_int 1)])))]
7640 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7642 movhps\t{%1, %0|%0, %1}
7643 psrldq\t{$8, %0|%0, 8}
7644 movq\t{%H1, %0|%0, %H1}
7645 mov{q}\t{%H1, %0|%0, %H1}"
7646 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7647 (set_attr "length_immediate" "*,1,*,*")
7648 (set_attr "memory" "*,none,*,*")
7649 (set_attr "mode" "V2SF,TI,TI,DI")])
7651 (define_insn "*vec_extractv2di_1_avx"
7652 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7654 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7655 (parallel [(const_int 1)])))]
7658 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7660 vmovhps\t{%1, %0|%0, %1}
7661 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7662 vmovq\t{%H1, %0|%0, %H1}"
7663 [(set_attr "type" "ssemov,sseishft1,ssemov")
7664 (set_attr "length_immediate" "*,1,*")
7665 (set_attr "memory" "*,none,*")
7666 (set_attr "prefix" "vex")
7667 (set_attr "mode" "V2SF,TI,TI")])
7669 (define_insn "*vec_extractv2di_1_sse2"
7670 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7672 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7673 (parallel [(const_int 1)])))]
7675 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7677 movhps\t{%1, %0|%0, %1}
7678 psrldq\t{$8, %0|%0, 8}
7679 movq\t{%H1, %0|%0, %H1}"
7680 [(set_attr "type" "ssemov,sseishft1,ssemov")
7681 (set_attr "length_immediate" "*,1,*")
7682 (set_attr "memory" "*,none,*")
7683 (set_attr "mode" "V2SF,TI,TI")])
7685 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7686 (define_insn "*vec_extractv2di_1_sse"
7687 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7689 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7690 (parallel [(const_int 1)])))]
7691 "!TARGET_SSE2 && TARGET_SSE
7692 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7694 movhps\t{%1, %0|%0, %1}
7695 movhlps\t{%1, %0|%0, %1}
7696 movlps\t{%H1, %0|%0, %H1}"
7697 [(set_attr "type" "ssemov")
7698 (set_attr "mode" "V2SF,V4SF,V2SF")])
7700 (define_insn "*vec_dupv4si_avx"
7701 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7703 (match_operand:SI 1 "register_operand" "x,m")))]
7706 vpshufd\t{$0, %1, %0|%0, %1, 0}
7707 vbroadcastss\t{%1, %0|%0, %1}"
7708 [(set_attr "type" "sselog1,ssemov")
7709 (set_attr "length_immediate" "1,0")
7710 (set_attr "prefix_extra" "0,1")
7711 (set_attr "prefix" "vex")
7712 (set_attr "mode" "TI,V4SF")])
7714 (define_insn "*vec_dupv4si"
7715 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7717 (match_operand:SI 1 "register_operand" " Y2,0")))]
7720 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7721 shufps\t{$0, %0, %0|%0, %0, 0}"
7722 [(set_attr "type" "sselog1")
7723 (set_attr "length_immediate" "1")
7724 (set_attr "mode" "TI,V4SF")])
7726 (define_insn "*vec_dupv2di_avx"
7727 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7729 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7732 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7733 vmovddup\t{%1, %0|%0, %1}"
7734 [(set_attr "type" "sselog1")
7735 (set_attr "prefix" "vex")
7736 (set_attr "mode" "TI,DF")])
7738 (define_insn "*vec_dupv2di_sse3"
7739 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7741 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7745 movddup\t{%1, %0|%0, %1}"
7746 [(set_attr "type" "sselog1")
7747 (set_attr "mode" "TI,DF")])
7749 (define_insn "*vec_dupv2di"
7750 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7752 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7757 [(set_attr "type" "sselog1,ssemov")
7758 (set_attr "mode" "TI,V4SF")])
7760 (define_insn "*vec_concatv2si_avx"
7761 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7763 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7764 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7767 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7768 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7769 vmovd\t{%1, %0|%0, %1}
7770 punpckldq\t{%2, %0|%0, %2}
7771 movd\t{%1, %0|%0, %1}"
7772 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7773 (set_attr "prefix_extra" "1,*,*,*,*")
7774 (set_attr "length_immediate" "1,*,*,*,*")
7775 (set (attr "prefix")
7776 (if_then_else (eq_attr "alternative" "3,4")
7777 (const_string "orig")
7778 (const_string "vex")))
7779 (set_attr "mode" "TI,TI,TI,DI,DI")])
7781 (define_insn "*vec_concatv2si_sse4_1"
7782 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7784 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7785 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7788 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7789 punpckldq\t{%2, %0|%0, %2}
7790 movd\t{%1, %0|%0, %1}
7791 punpckldq\t{%2, %0|%0, %2}
7792 movd\t{%1, %0|%0, %1}"
7793 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7794 (set_attr "prefix_extra" "1,*,*,*,*")
7795 (set_attr "length_immediate" "1,*,*,*,*")
7796 (set_attr "mode" "TI,TI,TI,DI,DI")])
7798 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7799 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7800 ;; alternatives pretty much forces the MMX alternative to be chosen.
7801 (define_insn "*vec_concatv2si_sse2"
7802 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7804 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7805 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7808 punpckldq\t{%2, %0|%0, %2}
7809 movd\t{%1, %0|%0, %1}
7810 punpckldq\t{%2, %0|%0, %2}
7811 movd\t{%1, %0|%0, %1}"
7812 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7813 (set_attr "mode" "TI,TI,DI,DI")])
7815 (define_insn "*vec_concatv2si_sse"
7816 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7818 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7819 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7822 unpcklps\t{%2, %0|%0, %2}
7823 movss\t{%1, %0|%0, %1}
7824 punpckldq\t{%2, %0|%0, %2}
7825 movd\t{%1, %0|%0, %1}"
7826 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7827 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7829 (define_insn "*vec_concatv4si_1_avx"
7830 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7832 (match_operand:V2SI 1 "register_operand" " x,x")
7833 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7836 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7837 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7838 [(set_attr "type" "sselog,ssemov")
7839 (set_attr "prefix" "vex")
7840 (set_attr "mode" "TI,V2SF")])
7842 (define_insn "*vec_concatv4si_1"
7843 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7845 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7846 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7849 punpcklqdq\t{%2, %0|%0, %2}
7850 movlhps\t{%2, %0|%0, %2}
7851 movhps\t{%2, %0|%0, %2}"
7852 [(set_attr "type" "sselog,ssemov,ssemov")
7853 (set_attr "mode" "TI,V4SF,V2SF")])
7855 (define_insn "*vec_concatv2di_avx"
7856 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7858 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7859 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7860 "!TARGET_64BIT && TARGET_AVX"
7862 vmovq\t{%1, %0|%0, %1}
7863 movq2dq\t{%1, %0|%0, %1}
7864 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7865 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7866 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7867 (set (attr "prefix")
7868 (if_then_else (eq_attr "alternative" "1")
7869 (const_string "orig")
7870 (const_string "vex")))
7871 (set_attr "mode" "TI,TI,TI,V2SF")])
7873 (define_insn "vec_concatv2di"
7874 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7876 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7877 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7878 "!TARGET_64BIT && TARGET_SSE"
7880 movq\t{%1, %0|%0, %1}
7881 movq2dq\t{%1, %0|%0, %1}
7882 punpcklqdq\t{%2, %0|%0, %2}
7883 movlhps\t{%2, %0|%0, %2}
7884 movhps\t{%2, %0|%0, %2}"
7885 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7886 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7888 (define_insn "*vec_concatv2di_rex64_avx"
7889 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7891 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7892 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7893 "TARGET_64BIT && TARGET_AVX"
7895 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7896 vmovq\t{%1, %0|%0, %1}
7897 vmovq\t{%1, %0|%0, %1}
7898 movq2dq\t{%1, %0|%0, %1}
7899 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7900 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7901 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7902 (set_attr "prefix_extra" "1,*,*,*,*,*")
7903 (set_attr "length_immediate" "1,*,*,*,*,*")
7904 (set (attr "prefix")
7905 (if_then_else (eq_attr "alternative" "3")
7906 (const_string "orig")
7907 (const_string "vex")))
7908 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7910 (define_insn "*vec_concatv2di_rex64_sse4_1"
7911 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7913 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7914 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7915 "TARGET_64BIT && TARGET_SSE4_1"
7917 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7918 movq\t{%1, %0|%0, %1}
7919 movq\t{%1, %0|%0, %1}
7920 movq2dq\t{%1, %0|%0, %1}
7921 punpcklqdq\t{%2, %0|%0, %2}
7922 movlhps\t{%2, %0|%0, %2}
7923 movhps\t{%2, %0|%0, %2}"
7924 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7925 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7926 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7927 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7928 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7930 (define_insn "*vec_concatv2di_rex64_sse"
7931 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7933 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7934 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7935 "TARGET_64BIT && TARGET_SSE"
7937 movq\t{%1, %0|%0, %1}
7938 movq\t{%1, %0|%0, %1}
7939 movq2dq\t{%1, %0|%0, %1}
7940 punpcklqdq\t{%2, %0|%0, %2}
7941 movlhps\t{%2, %0|%0, %2}
7942 movhps\t{%2, %0|%0, %2}"
7943 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7944 (set_attr "prefix_rex" "*,1,*,*,*,*")
7945 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7947 (define_expand "vec_unpacku_hi_v16qi"
7948 [(match_operand:V8HI 0 "register_operand" "")
7949 (match_operand:V16QI 1 "register_operand" "")]
7953 ix86_expand_sse4_unpack (operands, true, true);
7955 ix86_expand_sse_unpack (operands, true, true);
7959 (define_expand "vec_unpacks_hi_v16qi"
7960 [(match_operand:V8HI 0 "register_operand" "")
7961 (match_operand:V16QI 1 "register_operand" "")]
7965 ix86_expand_sse4_unpack (operands, false, true);
7967 ix86_expand_sse_unpack (operands, false, true);
7971 (define_expand "vec_unpacku_lo_v16qi"
7972 [(match_operand:V8HI 0 "register_operand" "")
7973 (match_operand:V16QI 1 "register_operand" "")]
7977 ix86_expand_sse4_unpack (operands, true, false);
7979 ix86_expand_sse_unpack (operands, true, false);
7983 (define_expand "vec_unpacks_lo_v16qi"
7984 [(match_operand:V8HI 0 "register_operand" "")
7985 (match_operand:V16QI 1 "register_operand" "")]
7989 ix86_expand_sse4_unpack (operands, false, false);
7991 ix86_expand_sse_unpack (operands, false, false);
7995 (define_expand "vec_unpacku_hi_v8hi"
7996 [(match_operand:V4SI 0 "register_operand" "")
7997 (match_operand:V8HI 1 "register_operand" "")]
8001 ix86_expand_sse4_unpack (operands, true, true);
8003 ix86_expand_sse_unpack (operands, true, true);
8007 (define_expand "vec_unpacks_hi_v8hi"
8008 [(match_operand:V4SI 0 "register_operand" "")
8009 (match_operand:V8HI 1 "register_operand" "")]
8013 ix86_expand_sse4_unpack (operands, false, true);
8015 ix86_expand_sse_unpack (operands, false, true);
8019 (define_expand "vec_unpacku_lo_v8hi"
8020 [(match_operand:V4SI 0 "register_operand" "")
8021 (match_operand:V8HI 1 "register_operand" "")]
8025 ix86_expand_sse4_unpack (operands, true, false);
8027 ix86_expand_sse_unpack (operands, true, false);
8031 (define_expand "vec_unpacks_lo_v8hi"
8032 [(match_operand:V4SI 0 "register_operand" "")
8033 (match_operand:V8HI 1 "register_operand" "")]
8037 ix86_expand_sse4_unpack (operands, false, false);
8039 ix86_expand_sse_unpack (operands, false, false);
8043 (define_expand "vec_unpacku_hi_v4si"
8044 [(match_operand:V2DI 0 "register_operand" "")
8045 (match_operand:V4SI 1 "register_operand" "")]
8049 ix86_expand_sse4_unpack (operands, true, true);
8051 ix86_expand_sse_unpack (operands, true, true);
8055 (define_expand "vec_unpacks_hi_v4si"
8056 [(match_operand:V2DI 0 "register_operand" "")
8057 (match_operand:V4SI 1 "register_operand" "")]
8061 ix86_expand_sse4_unpack (operands, false, true);
8063 ix86_expand_sse_unpack (operands, false, true);
8067 (define_expand "vec_unpacku_lo_v4si"
8068 [(match_operand:V2DI 0 "register_operand" "")
8069 (match_operand:V4SI 1 "register_operand" "")]
8073 ix86_expand_sse4_unpack (operands, true, false);
8075 ix86_expand_sse_unpack (operands, true, false);
8079 (define_expand "vec_unpacks_lo_v4si"
8080 [(match_operand:V2DI 0 "register_operand" "")
8081 (match_operand:V4SI 1 "register_operand" "")]
8085 ix86_expand_sse4_unpack (operands, false, false);
8087 ix86_expand_sse_unpack (operands, false, false);
8091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8097 (define_expand "sse2_uavgv16qi3"
8098 [(set (match_operand:V16QI 0 "register_operand" "")
8104 (match_operand:V16QI 1 "nonimmediate_operand" ""))
8106 (match_operand:V16QI 2 "nonimmediate_operand" "")))
8107 (const_vector:V16QI [(const_int 1) (const_int 1)
8108 (const_int 1) (const_int 1)
8109 (const_int 1) (const_int 1)
8110 (const_int 1) (const_int 1)
8111 (const_int 1) (const_int 1)
8112 (const_int 1) (const_int 1)
8113 (const_int 1) (const_int 1)
8114 (const_int 1) (const_int 1)]))
8117 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
8119 (define_insn "*avx_uavgv16qi3"
8120 [(set (match_operand:V16QI 0 "register_operand" "=x")
8126 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
8128 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8129 (const_vector:V16QI [(const_int 1) (const_int 1)
8130 (const_int 1) (const_int 1)
8131 (const_int 1) (const_int 1)
8132 (const_int 1) (const_int 1)
8133 (const_int 1) (const_int 1)
8134 (const_int 1) (const_int 1)
8135 (const_int 1) (const_int 1)
8136 (const_int 1) (const_int 1)]))
8138 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8139 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8140 [(set_attr "type" "sseiadd")
8141 (set_attr "prefix" "vex")
8142 (set_attr "mode" "TI")])
8144 (define_insn "*sse2_uavgv16qi3"
8145 [(set (match_operand:V16QI 0 "register_operand" "=x")
8151 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8153 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8154 (const_vector:V16QI [(const_int 1) (const_int 1)
8155 (const_int 1) (const_int 1)
8156 (const_int 1) (const_int 1)
8157 (const_int 1) (const_int 1)
8158 (const_int 1) (const_int 1)
8159 (const_int 1) (const_int 1)
8160 (const_int 1) (const_int 1)
8161 (const_int 1) (const_int 1)]))
8163 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8164 "pavgb\t{%2, %0|%0, %2}"
8165 [(set_attr "type" "sseiadd")
8166 (set_attr "prefix_data16" "1")
8167 (set_attr "mode" "TI")])
8169 (define_expand "sse2_uavgv8hi3"
8170 [(set (match_operand:V8HI 0 "register_operand" "")
8176 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8178 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8179 (const_vector:V8HI [(const_int 1) (const_int 1)
8180 (const_int 1) (const_int 1)
8181 (const_int 1) (const_int 1)
8182 (const_int 1) (const_int 1)]))
8185 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8187 (define_insn "*avx_uavgv8hi3"
8188 [(set (match_operand:V8HI 0 "register_operand" "=x")
8194 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8196 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8197 (const_vector:V8HI [(const_int 1) (const_int 1)
8198 (const_int 1) (const_int 1)
8199 (const_int 1) (const_int 1)
8200 (const_int 1) (const_int 1)]))
8202 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8203 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8204 [(set_attr "type" "sseiadd")
8205 (set_attr "prefix" "vex")
8206 (set_attr "mode" "TI")])
8208 (define_insn "*sse2_uavgv8hi3"
8209 [(set (match_operand:V8HI 0 "register_operand" "=x")
8215 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8218 (const_vector:V8HI [(const_int 1) (const_int 1)
8219 (const_int 1) (const_int 1)
8220 (const_int 1) (const_int 1)
8221 (const_int 1) (const_int 1)]))
8223 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8224 "pavgw\t{%2, %0|%0, %2}"
8225 [(set_attr "type" "sseiadd")
8226 (set_attr "prefix_data16" "1")
8227 (set_attr "mode" "TI")])
8229 ;; The correct representation for this is absolutely enormous, and
8230 ;; surely not generally useful.
8231 (define_insn "*avx_psadbw"
8232 [(set (match_operand:V2DI 0 "register_operand" "=x")
8233 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8234 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8237 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8238 [(set_attr "type" "sseiadd")
8239 (set_attr "prefix" "vex")
8240 (set_attr "mode" "TI")])
8242 (define_insn "sse2_psadbw"
8243 [(set (match_operand:V2DI 0 "register_operand" "=x")
8244 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8245 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8248 "psadbw\t{%2, %0|%0, %2}"
8249 [(set_attr "type" "sseiadd")
8250 (set_attr "atom_unit" "simul")
8251 (set_attr "prefix_data16" "1")
8252 (set_attr "mode" "TI")])
8254 (define_insn "avx_movmsk<ssemodesuffix>256"
8255 [(set (match_operand:SI 0 "register_operand" "=r")
8257 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8259 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8260 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8261 [(set_attr "type" "ssecvt")
8262 (set_attr "prefix" "vex")
8263 (set_attr "mode" "<MODE>")])
8265 (define_insn "<sse>_movmsk<ssemodesuffix>"
8266 [(set (match_operand:SI 0 "register_operand" "=r")
8268 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8270 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8271 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8272 [(set_attr "type" "ssemov")
8273 (set_attr "prefix" "maybe_vex")
8274 (set_attr "mode" "<MODE>")])
8276 (define_insn "sse2_pmovmskb"
8277 [(set (match_operand:SI 0 "register_operand" "=r")
8278 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8281 "%vpmovmskb\t{%1, %0|%0, %1}"
8282 [(set_attr "type" "ssemov")
8283 (set_attr "prefix_data16" "1")
8284 (set_attr "prefix" "maybe_vex")
8285 (set_attr "mode" "SI")])
8287 (define_expand "sse2_maskmovdqu"
8288 [(set (match_operand:V16QI 0 "memory_operand" "")
8289 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8290 (match_operand:V16QI 2 "register_operand" "")
8295 (define_insn "*sse2_maskmovdqu"
8296 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8297 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8298 (match_operand:V16QI 2 "register_operand" "x")
8299 (mem:V16QI (match_dup 0))]
8301 "TARGET_SSE2 && !TARGET_64BIT"
8302 ;; @@@ check ordering of operands in intel/nonintel syntax
8303 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8304 [(set_attr "type" "ssemov")
8305 (set_attr "prefix_data16" "1")
8306 ;; The implicit %rdi operand confuses default length_vex computation.
8307 (set_attr "length_vex" "3")
8308 (set_attr "prefix" "maybe_vex")
8309 (set_attr "mode" "TI")])
8311 (define_insn "*sse2_maskmovdqu_rex64"
8312 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8313 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8314 (match_operand:V16QI 2 "register_operand" "x")
8315 (mem:V16QI (match_dup 0))]
8317 "TARGET_SSE2 && TARGET_64BIT"
8318 ;; @@@ check ordering of operands in intel/nonintel syntax
8319 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8320 [(set_attr "type" "ssemov")
8321 (set_attr "prefix_data16" "1")
8322 ;; The implicit %rdi operand confuses default length_vex computation.
8323 (set (attr "length_vex")
8324 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8325 (set_attr "prefix" "maybe_vex")
8326 (set_attr "mode" "TI")])
8328 (define_insn "sse_ldmxcsr"
8329 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8333 [(set_attr "type" "sse")
8334 (set_attr "atom_sse_attr" "mxcsr")
8335 (set_attr "prefix" "maybe_vex")
8336 (set_attr "memory" "load")])
8338 (define_insn "sse_stmxcsr"
8339 [(set (match_operand:SI 0 "memory_operand" "=m")
8340 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8343 [(set_attr "type" "sse")
8344 (set_attr "atom_sse_attr" "mxcsr")
8345 (set_attr "prefix" "maybe_vex")
8346 (set_attr "memory" "store")])
8348 (define_expand "sse_sfence"
8350 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8351 "TARGET_SSE || TARGET_3DNOW_A"
8353 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8354 MEM_VOLATILE_P (operands[0]) = 1;
8357 (define_insn "*sse_sfence"
8358 [(set (match_operand:BLK 0 "" "")
8359 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8360 "TARGET_SSE || TARGET_3DNOW_A"
8362 [(set_attr "type" "sse")
8363 (set_attr "length_address" "0")
8364 (set_attr "atom_sse_attr" "fence")
8365 (set_attr "memory" "unknown")])
8367 (define_insn "sse2_clflush"
8368 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8372 [(set_attr "type" "sse")
8373 (set_attr "atom_sse_attr" "fence")
8374 (set_attr "memory" "unknown")])
8376 (define_expand "sse2_mfence"
8378 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8381 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8382 MEM_VOLATILE_P (operands[0]) = 1;
8385 (define_insn "*sse2_mfence"
8386 [(set (match_operand:BLK 0 "" "")
8387 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8388 "TARGET_64BIT || TARGET_SSE2"
8390 [(set_attr "type" "sse")
8391 (set_attr "length_address" "0")
8392 (set_attr "atom_sse_attr" "fence")
8393 (set_attr "memory" "unknown")])
8395 (define_expand "sse2_lfence"
8397 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8400 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8401 MEM_VOLATILE_P (operands[0]) = 1;
8404 (define_insn "*sse2_lfence"
8405 [(set (match_operand:BLK 0 "" "")
8406 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8409 [(set_attr "type" "sse")
8410 (set_attr "length_address" "0")
8411 (set_attr "atom_sse_attr" "lfence")
8412 (set_attr "memory" "unknown")])
8414 (define_insn "sse3_mwait"
8415 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8416 (match_operand:SI 1 "register_operand" "c")]
8419 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8420 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8421 ;; we only need to set up 32bit registers.
8423 [(set_attr "length" "3")])
8425 (define_insn "sse3_monitor"
8426 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8427 (match_operand:SI 1 "register_operand" "c")
8428 (match_operand:SI 2 "register_operand" "d")]
8430 "TARGET_SSE3 && !TARGET_64BIT"
8431 "monitor\t%0, %1, %2"
8432 [(set_attr "length" "3")])
8434 (define_insn "sse3_monitor64"
8435 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8436 (match_operand:SI 1 "register_operand" "c")
8437 (match_operand:SI 2 "register_operand" "d")]
8439 "TARGET_SSE3 && TARGET_64BIT"
8440 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8441 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8442 ;; zero extended to 64bit, we only need to set up 32bit registers.
8444 [(set_attr "length" "3")])
8446 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8448 ;; SSSE3 instructions
8450 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8452 (define_insn "*avx_phaddwv8hi3"
8453 [(set (match_operand:V8HI 0 "register_operand" "=x")
8459 (match_operand:V8HI 1 "register_operand" "x")
8460 (parallel [(const_int 0)]))
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8464 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8476 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8477 (parallel [(const_int 0)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8490 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8491 [(set_attr "type" "sseiadd")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "prefix" "vex")
8494 (set_attr "mode" "TI")])
8496 (define_insn "ssse3_phaddwv8hi3"
8497 [(set (match_operand:V8HI 0 "register_operand" "=x")
8503 (match_operand:V8HI 1 "register_operand" "0")
8504 (parallel [(const_int 0)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8520 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8521 (parallel [(const_int 0)]))
8522 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8524 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8534 "phaddw\t{%2, %0|%0, %2}"
8535 [(set_attr "type" "sseiadd")
8536 (set_attr "atom_unit" "complex")
8537 (set_attr "prefix_data16" "1")
8538 (set_attr "prefix_extra" "1")
8539 (set_attr "mode" "TI")])
8541 (define_insn "ssse3_phaddwv4hi3"
8542 [(set (match_operand:V4HI 0 "register_operand" "=y")
8547 (match_operand:V4HI 1 "register_operand" "0")
8548 (parallel [(const_int 0)]))
8549 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8551 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8552 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8556 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8557 (parallel [(const_int 0)]))
8558 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8561 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8563 "phaddw\t{%2, %0|%0, %2}"
8564 [(set_attr "type" "sseiadd")
8565 (set_attr "atom_unit" "complex")
8566 (set_attr "prefix_extra" "1")
8567 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8568 (set_attr "mode" "DI")])
8570 (define_insn "*avx_phadddv4si3"
8571 [(set (match_operand:V4SI 0 "register_operand" "=x")
8576 (match_operand:V4SI 1 "register_operand" "x")
8577 (parallel [(const_int 0)]))
8578 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8580 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8581 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8585 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8586 (parallel [(const_int 0)]))
8587 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8589 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8590 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8592 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8593 [(set_attr "type" "sseiadd")
8594 (set_attr "prefix_extra" "1")
8595 (set_attr "prefix" "vex")
8596 (set_attr "mode" "TI")])
8598 (define_insn "ssse3_phadddv4si3"
8599 [(set (match_operand:V4SI 0 "register_operand" "=x")
8604 (match_operand:V4SI 1 "register_operand" "0")
8605 (parallel [(const_int 0)]))
8606 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8608 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8609 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8613 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8614 (parallel [(const_int 0)]))
8615 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8617 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8618 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8620 "phaddd\t{%2, %0|%0, %2}"
8621 [(set_attr "type" "sseiadd")
8622 (set_attr "atom_unit" "complex")
8623 (set_attr "prefix_data16" "1")
8624 (set_attr "prefix_extra" "1")
8625 (set_attr "mode" "TI")])
8627 (define_insn "ssse3_phadddv2si3"
8628 [(set (match_operand:V2SI 0 "register_operand" "=y")
8632 (match_operand:V2SI 1 "register_operand" "0")
8633 (parallel [(const_int 0)]))
8634 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8637 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8638 (parallel [(const_int 0)]))
8639 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8641 "phaddd\t{%2, %0|%0, %2}"
8642 [(set_attr "type" "sseiadd")
8643 (set_attr "atom_unit" "complex")
8644 (set_attr "prefix_extra" "1")
8645 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8646 (set_attr "mode" "DI")])
8648 (define_insn "*avx_phaddswv8hi3"
8649 [(set (match_operand:V8HI 0 "register_operand" "=x")
8655 (match_operand:V8HI 1 "register_operand" "x")
8656 (parallel [(const_int 0)]))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8667 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8672 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8673 (parallel [(const_int 0)]))
8674 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8680 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8684 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8686 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8687 [(set_attr "type" "sseiadd")
8688 (set_attr "prefix_extra" "1")
8689 (set_attr "prefix" "vex")
8690 (set_attr "mode" "TI")])
8692 (define_insn "ssse3_phaddswv8hi3"
8693 [(set (match_operand:V8HI 0 "register_operand" "=x")
8699 (match_operand:V8HI 1 "register_operand" "0")
8700 (parallel [(const_int 0)]))
8701 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8704 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8708 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8711 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8716 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8717 (parallel [(const_int 0)]))
8718 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8720 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8724 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8725 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8727 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8728 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8730 "phaddsw\t{%2, %0|%0, %2}"
8731 [(set_attr "type" "sseiadd")
8732 (set_attr "atom_unit" "complex")
8733 (set_attr "prefix_data16" "1")
8734 (set_attr "prefix_extra" "1")
8735 (set_attr "mode" "TI")])
8737 (define_insn "ssse3_phaddswv4hi3"
8738 [(set (match_operand:V4HI 0 "register_operand" "=y")
8743 (match_operand:V4HI 1 "register_operand" "0")
8744 (parallel [(const_int 0)]))
8745 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8747 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8748 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8752 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8753 (parallel [(const_int 0)]))
8754 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8756 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8757 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8759 "phaddsw\t{%2, %0|%0, %2}"
8760 [(set_attr "type" "sseiadd")
8761 (set_attr "atom_unit" "complex")
8762 (set_attr "prefix_extra" "1")
8763 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8764 (set_attr "mode" "DI")])
8766 (define_insn "*avx_phsubwv8hi3"
8767 [(set (match_operand:V8HI 0 "register_operand" "=x")
8773 (match_operand:V8HI 1 "register_operand" "x")
8774 (parallel [(const_int 0)]))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8777 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8778 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8781 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8785 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8790 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8791 (parallel [(const_int 0)]))
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8794 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8798 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8799 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8801 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8804 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8805 [(set_attr "type" "sseiadd")
8806 (set_attr "prefix_extra" "1")
8807 (set_attr "prefix" "vex")
8808 (set_attr "mode" "TI")])
8810 (define_insn "ssse3_phsubwv8hi3"
8811 [(set (match_operand:V8HI 0 "register_operand" "=x")
8817 (match_operand:V8HI 1 "register_operand" "0")
8818 (parallel [(const_int 0)]))
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8821 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8822 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8825 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8826 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8828 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8829 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8834 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8835 (parallel [(const_int 0)]))
8836 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8838 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8839 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8842 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8843 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8845 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8846 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8848 "phsubw\t{%2, %0|%0, %2}"
8849 [(set_attr "type" "sseiadd")
8850 (set_attr "atom_unit" "complex")
8851 (set_attr "prefix_data16" "1")
8852 (set_attr "prefix_extra" "1")
8853 (set_attr "mode" "TI")])
8855 (define_insn "ssse3_phsubwv4hi3"
8856 [(set (match_operand:V4HI 0 "register_operand" "=y")
8861 (match_operand:V4HI 1 "register_operand" "0")
8862 (parallel [(const_int 0)]))
8863 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8865 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8866 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8870 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8871 (parallel [(const_int 0)]))
8872 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8874 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8875 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8877 "phsubw\t{%2, %0|%0, %2}"
8878 [(set_attr "type" "sseiadd")
8879 (set_attr "atom_unit" "complex")
8880 (set_attr "prefix_extra" "1")
8881 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8882 (set_attr "mode" "DI")])
8884 (define_insn "*avx_phsubdv4si3"
8885 [(set (match_operand:V4SI 0 "register_operand" "=x")
8890 (match_operand:V4SI 1 "register_operand" "x")
8891 (parallel [(const_int 0)]))
8892 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8894 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8895 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8899 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8900 (parallel [(const_int 0)]))
8901 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8903 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8904 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8906 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8907 [(set_attr "type" "sseiadd")
8908 (set_attr "prefix_extra" "1")
8909 (set_attr "prefix" "vex")
8910 (set_attr "mode" "TI")])
8912 (define_insn "ssse3_phsubdv4si3"
8913 [(set (match_operand:V4SI 0 "register_operand" "=x")
8918 (match_operand:V4SI 1 "register_operand" "0")
8919 (parallel [(const_int 0)]))
8920 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8922 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8923 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8927 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8928 (parallel [(const_int 0)]))
8929 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8931 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8932 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8934 "phsubd\t{%2, %0|%0, %2}"
8935 [(set_attr "type" "sseiadd")
8936 (set_attr "atom_unit" "complex")
8937 (set_attr "prefix_data16" "1")
8938 (set_attr "prefix_extra" "1")
8939 (set_attr "mode" "TI")])
8941 (define_insn "ssse3_phsubdv2si3"
8942 [(set (match_operand:V2SI 0 "register_operand" "=y")
8946 (match_operand:V2SI 1 "register_operand" "0")
8947 (parallel [(const_int 0)]))
8948 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8951 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8952 (parallel [(const_int 0)]))
8953 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8955 "phsubd\t{%2, %0|%0, %2}"
8956 [(set_attr "type" "sseiadd")
8957 (set_attr "atom_unit" "complex")
8958 (set_attr "prefix_extra" "1")
8959 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8960 (set_attr "mode" "DI")])
8962 (define_insn "*avx_phsubswv8hi3"
8963 [(set (match_operand:V8HI 0 "register_operand" "=x")
8969 (match_operand:V8HI 1 "register_operand" "x")
8970 (parallel [(const_int 0)]))
8971 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8973 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8974 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8977 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8978 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8980 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8981 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8986 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8987 (parallel [(const_int 0)]))
8988 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8990 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8991 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8994 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8995 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8997 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8998 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
9000 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
9001 [(set_attr "type" "sseiadd")
9002 (set_attr "prefix_extra" "1")
9003 (set_attr "prefix" "vex")
9004 (set_attr "mode" "TI")])
9006 (define_insn "ssse3_phsubswv8hi3"
9007 [(set (match_operand:V8HI 0 "register_operand" "=x")
9013 (match_operand:V8HI 1 "register_operand" "0")
9014 (parallel [(const_int 0)]))
9015 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
9017 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
9018 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
9021 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
9022 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
9024 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
9025 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
9030 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9031 (parallel [(const_int 0)]))
9032 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
9034 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
9035 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
9038 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
9039 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
9041 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
9042 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
9044 "phsubsw\t{%2, %0|%0, %2}"
9045 [(set_attr "type" "sseiadd")
9046 (set_attr "atom_unit" "complex")
9047 (set_attr "prefix_data16" "1")
9048 (set_attr "prefix_extra" "1")
9049 (set_attr "mode" "TI")])
9051 (define_insn "ssse3_phsubswv4hi3"
9052 [(set (match_operand:V4HI 0 "register_operand" "=y")
9057 (match_operand:V4HI 1 "register_operand" "0")
9058 (parallel [(const_int 0)]))
9059 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
9061 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
9062 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
9066 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
9067 (parallel [(const_int 0)]))
9068 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
9070 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
9071 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
9073 "phsubsw\t{%2, %0|%0, %2}"
9074 [(set_attr "type" "sseiadd")
9075 (set_attr "atom_unit" "complex")
9076 (set_attr "prefix_extra" "1")
9077 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9078 (set_attr "mode" "DI")])
9080 (define_insn "*avx_pmaddubsw128"
9081 [(set (match_operand:V8HI 0 "register_operand" "=x")
9086 (match_operand:V16QI 1 "register_operand" "x")
9087 (parallel [(const_int 0)
9097 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9098 (parallel [(const_int 0)
9108 (vec_select:V16QI (match_dup 1)
9109 (parallel [(const_int 1)
9118 (vec_select:V16QI (match_dup 2)
9119 (parallel [(const_int 1)
9126 (const_int 15)]))))))]
9128 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9129 [(set_attr "type" "sseiadd")
9130 (set_attr "prefix_extra" "1")
9131 (set_attr "prefix" "vex")
9132 (set_attr "mode" "TI")])
9134 (define_insn "ssse3_pmaddubsw128"
9135 [(set (match_operand:V8HI 0 "register_operand" "=x")
9140 (match_operand:V16QI 1 "register_operand" "0")
9141 (parallel [(const_int 0)
9151 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9152 (parallel [(const_int 0)
9162 (vec_select:V16QI (match_dup 1)
9163 (parallel [(const_int 1)
9172 (vec_select:V16QI (match_dup 2)
9173 (parallel [(const_int 1)
9180 (const_int 15)]))))))]
9182 "pmaddubsw\t{%2, %0|%0, %2}"
9183 [(set_attr "type" "sseiadd")
9184 (set_attr "atom_unit" "simul")
9185 (set_attr "prefix_data16" "1")
9186 (set_attr "prefix_extra" "1")
9187 (set_attr "mode" "TI")])
9189 (define_insn "ssse3_pmaddubsw"
9190 [(set (match_operand:V4HI 0 "register_operand" "=y")
9195 (match_operand:V8QI 1 "register_operand" "0")
9196 (parallel [(const_int 0)
9202 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9203 (parallel [(const_int 0)
9209 (vec_select:V8QI (match_dup 1)
9210 (parallel [(const_int 1)
9215 (vec_select:V8QI (match_dup 2)
9216 (parallel [(const_int 1)
9219 (const_int 7)]))))))]
9221 "pmaddubsw\t{%2, %0|%0, %2}"
9222 [(set_attr "type" "sseiadd")
9223 (set_attr "atom_unit" "simul")
9224 (set_attr "prefix_extra" "1")
9225 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9226 (set_attr "mode" "DI")])
9228 (define_expand "ssse3_pmulhrswv8hi3"
9229 [(set (match_operand:V8HI 0 "register_operand" "")
9236 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9238 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9240 (const_vector:V8HI [(const_int 1) (const_int 1)
9241 (const_int 1) (const_int 1)
9242 (const_int 1) (const_int 1)
9243 (const_int 1) (const_int 1)]))
9246 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9248 (define_insn "*avx_pmulhrswv8hi3"
9249 [(set (match_operand:V8HI 0 "register_operand" "=x")
9256 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9258 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9260 (const_vector:V8HI [(const_int 1) (const_int 1)
9261 (const_int 1) (const_int 1)
9262 (const_int 1) (const_int 1)
9263 (const_int 1) (const_int 1)]))
9265 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9266 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9267 [(set_attr "type" "sseimul")
9268 (set_attr "prefix_extra" "1")
9269 (set_attr "prefix" "vex")
9270 (set_attr "mode" "TI")])
9272 (define_insn "*ssse3_pmulhrswv8hi3"
9273 [(set (match_operand:V8HI 0 "register_operand" "=x")
9280 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9282 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9284 (const_vector:V8HI [(const_int 1) (const_int 1)
9285 (const_int 1) (const_int 1)
9286 (const_int 1) (const_int 1)
9287 (const_int 1) (const_int 1)]))
9289 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9290 "pmulhrsw\t{%2, %0|%0, %2}"
9291 [(set_attr "type" "sseimul")
9292 (set_attr "prefix_data16" "1")
9293 (set_attr "prefix_extra" "1")
9294 (set_attr "mode" "TI")])
9296 (define_expand "ssse3_pmulhrswv4hi3"
9297 [(set (match_operand:V4HI 0 "register_operand" "")
9304 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9306 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9308 (const_vector:V4HI [(const_int 1) (const_int 1)
9309 (const_int 1) (const_int 1)]))
9312 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9314 (define_insn "*ssse3_pmulhrswv4hi3"
9315 [(set (match_operand:V4HI 0 "register_operand" "=y")
9322 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9324 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9326 (const_vector:V4HI [(const_int 1) (const_int 1)
9327 (const_int 1) (const_int 1)]))
9329 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9330 "pmulhrsw\t{%2, %0|%0, %2}"
9331 [(set_attr "type" "sseimul")
9332 (set_attr "prefix_extra" "1")
9333 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9334 (set_attr "mode" "DI")])
9336 (define_insn "*avx_pshufbv16qi3"
9337 [(set (match_operand:V16QI 0 "register_operand" "=x")
9338 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9339 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9342 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9343 [(set_attr "type" "sselog1")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "prefix" "vex")
9346 (set_attr "mode" "TI")])
9348 (define_insn "ssse3_pshufbv16qi3"
9349 [(set (match_operand:V16QI 0 "register_operand" "=x")
9350 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9351 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9354 "pshufb\t{%2, %0|%0, %2}";
9355 [(set_attr "type" "sselog1")
9356 (set_attr "prefix_data16" "1")
9357 (set_attr "prefix_extra" "1")
9358 (set_attr "mode" "TI")])
9360 (define_insn "ssse3_pshufbv8qi3"
9361 [(set (match_operand:V8QI 0 "register_operand" "=y")
9362 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9363 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9366 "pshufb\t{%2, %0|%0, %2}";
9367 [(set_attr "type" "sselog1")
9368 (set_attr "prefix_extra" "1")
9369 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9370 (set_attr "mode" "DI")])
9372 (define_insn "*avx_psign<mode>3"
9373 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9375 [(match_operand:SSEMODE124 1 "register_operand" "x")
9376 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9379 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9380 [(set_attr "type" "sselog1")
9381 (set_attr "prefix_extra" "1")
9382 (set_attr "prefix" "vex")
9383 (set_attr "mode" "TI")])
9385 (define_insn "ssse3_psign<mode>3"
9386 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9388 [(match_operand:SSEMODE124 1 "register_operand" "0")
9389 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9392 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9393 [(set_attr "type" "sselog1")
9394 (set_attr "prefix_data16" "1")
9395 (set_attr "prefix_extra" "1")
9396 (set_attr "mode" "TI")])
9398 (define_insn "ssse3_psign<mode>3"
9399 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9401 [(match_operand:MMXMODEI 1 "register_operand" "0")
9402 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9405 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9406 [(set_attr "type" "sselog1")
9407 (set_attr "prefix_extra" "1")
9408 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9409 (set_attr "mode" "DI")])
9411 (define_insn "*avx_palignrti"
9412 [(set (match_operand:TI 0 "register_operand" "=x")
9413 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9414 (match_operand:TI 2 "nonimmediate_operand" "xm")
9415 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9419 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9420 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9422 [(set_attr "type" "sseishft")
9423 (set_attr "prefix_extra" "1")
9424 (set_attr "length_immediate" "1")
9425 (set_attr "prefix" "vex")
9426 (set_attr "mode" "TI")])
9428 (define_insn "ssse3_palignrti"
9429 [(set (match_operand:TI 0 "register_operand" "=x")
9430 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9431 (match_operand:TI 2 "nonimmediate_operand" "xm")
9432 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9436 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9437 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9439 [(set_attr "type" "sseishft")
9440 (set_attr "atom_unit" "sishuf")
9441 (set_attr "prefix_data16" "1")
9442 (set_attr "prefix_extra" "1")
9443 (set_attr "length_immediate" "1")
9444 (set_attr "mode" "TI")])
9446 (define_insn "ssse3_palignrdi"
9447 [(set (match_operand:DI 0 "register_operand" "=y")
9448 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9449 (match_operand:DI 2 "nonimmediate_operand" "ym")
9450 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9454 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9455 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9457 [(set_attr "type" "sseishft")
9458 (set_attr "atom_unit" "sishuf")
9459 (set_attr "prefix_extra" "1")
9460 (set_attr "length_immediate" "1")
9461 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9462 (set_attr "mode" "DI")])
9464 (define_insn "abs<mode>2"
9465 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9466 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9468 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9469 [(set_attr "type" "sselog1")
9470 (set_attr "prefix_data16" "1")
9471 (set_attr "prefix_extra" "1")
9472 (set_attr "prefix" "maybe_vex")
9473 (set_attr "mode" "TI")])
9475 (define_insn "abs<mode>2"
9476 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9477 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9479 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9480 [(set_attr "type" "sselog1")
9481 (set_attr "prefix_rep" "0")
9482 (set_attr "prefix_extra" "1")
9483 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9484 (set_attr "mode" "DI")])
9486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9488 ;; AMD SSE4A instructions
9490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9492 (define_insn "sse4a_movnt<mode>"
9493 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9495 [(match_operand:MODEF 1 "register_operand" "x")]
9498 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9499 [(set_attr "type" "ssemov")
9500 (set_attr "mode" "<MODE>")])
9502 (define_insn "sse4a_vmmovnt<mode>"
9503 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9504 (unspec:<ssescalarmode>
9505 [(vec_select:<ssescalarmode>
9506 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9507 (parallel [(const_int 0)]))]
9510 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9511 [(set_attr "type" "ssemov")
9512 (set_attr "mode" "<ssescalarmode>")])
9514 (define_insn "sse4a_extrqi"
9515 [(set (match_operand:V2DI 0 "register_operand" "=x")
9516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9517 (match_operand 2 "const_int_operand" "")
9518 (match_operand 3 "const_int_operand" "")]
9521 "extrq\t{%3, %2, %0|%0, %2, %3}"
9522 [(set_attr "type" "sse")
9523 (set_attr "prefix_data16" "1")
9524 (set_attr "length_immediate" "2")
9525 (set_attr "mode" "TI")])
9527 (define_insn "sse4a_extrq"
9528 [(set (match_operand:V2DI 0 "register_operand" "=x")
9529 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9530 (match_operand:V16QI 2 "register_operand" "x")]
9533 "extrq\t{%2, %0|%0, %2}"
9534 [(set_attr "type" "sse")
9535 (set_attr "prefix_data16" "1")
9536 (set_attr "mode" "TI")])
9538 (define_insn "sse4a_insertqi"
9539 [(set (match_operand:V2DI 0 "register_operand" "=x")
9540 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9541 (match_operand:V2DI 2 "register_operand" "x")
9542 (match_operand 3 "const_int_operand" "")
9543 (match_operand 4 "const_int_operand" "")]
9546 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9547 [(set_attr "type" "sseins")
9548 (set_attr "prefix_data16" "0")
9549 (set_attr "prefix_rep" "1")
9550 (set_attr "length_immediate" "2")
9551 (set_attr "mode" "TI")])
9553 (define_insn "sse4a_insertq"
9554 [(set (match_operand:V2DI 0 "register_operand" "=x")
9555 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9556 (match_operand:V2DI 2 "register_operand" "x")]
9559 "insertq\t{%2, %0|%0, %2}"
9560 [(set_attr "type" "sseins")
9561 (set_attr "prefix_data16" "0")
9562 (set_attr "prefix_rep" "1")
9563 (set_attr "mode" "TI")])
9565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9567 ;; Intel SSE4.1 instructions
9569 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9571 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9572 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9573 (vec_merge:AVXMODEF2P
9574 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9575 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9576 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9578 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9579 [(set_attr "type" "ssemov")
9580 (set_attr "prefix_extra" "1")
9581 (set_attr "length_immediate" "1")
9582 (set_attr "prefix" "vex")
9583 (set_attr "mode" "<avxvecmode>")])
9585 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9586 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9588 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9589 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9590 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9593 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9594 [(set_attr "type" "ssemov")
9595 (set_attr "prefix_extra" "1")
9596 (set_attr "length_immediate" "1")
9597 (set_attr "prefix" "vex")
9598 (set_attr "mode" "<avxvecmode>")])
9600 (define_insn "sse4_1_blend<ssemodesuffix>"
9601 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9602 (vec_merge:SSEMODEF2P
9603 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9604 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9605 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9607 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9608 [(set_attr "type" "ssemov")
9609 (set_attr "prefix_data16" "1")
9610 (set_attr "prefix_extra" "1")
9611 (set_attr "length_immediate" "1")
9612 (set_attr "mode" "<MODE>")])
9614 (define_insn "sse4_1_blendv<ssemodesuffix>"
9615 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9617 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9618 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9619 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9622 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9623 [(set_attr "type" "ssemov")
9624 (set_attr "prefix_data16" "1")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "mode" "<MODE>")])
9628 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9629 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9631 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9632 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9633 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9636 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9637 [(set_attr "type" "ssemul")
9638 (set_attr "prefix" "vex")
9639 (set_attr "prefix_extra" "1")
9640 (set_attr "length_immediate" "1")
9641 (set_attr "mode" "<avxvecmode>")])
9643 (define_insn "sse4_1_dp<ssemodesuffix>"
9644 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9646 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9647 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9648 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9651 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9652 [(set_attr "type" "ssemul")
9653 (set_attr "prefix_data16" "1")
9654 (set_attr "prefix_extra" "1")
9655 (set_attr "length_immediate" "1")
9656 (set_attr "mode" "<MODE>")])
9658 (define_insn "sse4_1_movntdqa"
9659 [(set (match_operand:V2DI 0 "register_operand" "=x")
9660 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9663 "%vmovntdqa\t{%1, %0|%0, %1}"
9664 [(set_attr "type" "ssemov")
9665 (set_attr "prefix_extra" "1")
9666 (set_attr "prefix" "maybe_vex")
9667 (set_attr "mode" "TI")])
9669 (define_insn "*avx_mpsadbw"
9670 [(set (match_operand:V16QI 0 "register_operand" "=x")
9671 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9672 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9673 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9676 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9677 [(set_attr "type" "sselog1")
9678 (set_attr "prefix" "vex")
9679 (set_attr "prefix_extra" "1")
9680 (set_attr "length_immediate" "1")
9681 (set_attr "mode" "TI")])
9683 (define_insn "sse4_1_mpsadbw"
9684 [(set (match_operand:V16QI 0 "register_operand" "=x")
9685 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9686 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9687 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9690 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9691 [(set_attr "type" "sselog1")
9692 (set_attr "prefix_extra" "1")
9693 (set_attr "length_immediate" "1")
9694 (set_attr "mode" "TI")])
9696 (define_insn "*avx_packusdw"
9697 [(set (match_operand:V8HI 0 "register_operand" "=x")
9700 (match_operand:V4SI 1 "register_operand" "x"))
9702 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9704 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9705 [(set_attr "type" "sselog")
9706 (set_attr "prefix_extra" "1")
9707 (set_attr "prefix" "vex")
9708 (set_attr "mode" "TI")])
9710 (define_insn "sse4_1_packusdw"
9711 [(set (match_operand:V8HI 0 "register_operand" "=x")
9714 (match_operand:V4SI 1 "register_operand" "0"))
9716 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9718 "packusdw\t{%2, %0|%0, %2}"
9719 [(set_attr "type" "sselog")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "mode" "TI")])
9723 (define_insn "*avx_pblendvb"
9724 [(set (match_operand:V16QI 0 "register_operand" "=x")
9725 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9726 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9727 (match_operand:V16QI 3 "register_operand" "x")]
9730 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9731 [(set_attr "type" "ssemov")
9732 (set_attr "prefix_extra" "1")
9733 (set_attr "length_immediate" "1")
9734 (set_attr "prefix" "vex")
9735 (set_attr "mode" "TI")])
9737 (define_insn "sse4_1_pblendvb"
9738 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9739 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9740 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9741 (match_operand:V16QI 3 "register_operand" "Yz")]
9744 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9745 [(set_attr "type" "ssemov")
9746 (set_attr "prefix_extra" "1")
9747 (set_attr "mode" "TI")])
9749 (define_insn "*avx_pblendw"
9750 [(set (match_operand:V8HI 0 "register_operand" "=x")
9752 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9753 (match_operand:V8HI 1 "register_operand" "x")
9754 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9756 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9757 [(set_attr "type" "ssemov")
9758 (set_attr "prefix" "vex")
9759 (set_attr "prefix_extra" "1")
9760 (set_attr "length_immediate" "1")
9761 (set_attr "mode" "TI")])
9763 (define_insn "sse4_1_pblendw"
9764 [(set (match_operand:V8HI 0 "register_operand" "=x")
9766 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9767 (match_operand:V8HI 1 "register_operand" "0")
9768 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9770 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9771 [(set_attr "type" "ssemov")
9772 (set_attr "prefix_extra" "1")
9773 (set_attr "length_immediate" "1")
9774 (set_attr "mode" "TI")])
9776 (define_insn "sse4_1_phminposuw"
9777 [(set (match_operand:V8HI 0 "register_operand" "=x")
9778 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9779 UNSPEC_PHMINPOSUW))]
9781 "%vphminposuw\t{%1, %0|%0, %1}"
9782 [(set_attr "type" "sselog1")
9783 (set_attr "prefix_extra" "1")
9784 (set_attr "prefix" "maybe_vex")
9785 (set_attr "mode" "TI")])
9787 (define_insn "sse4_1_<code>v8qiv8hi2"
9788 [(set (match_operand:V8HI 0 "register_operand" "=x")
9791 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9792 (parallel [(const_int 0)
9801 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9802 [(set_attr "type" "ssemov")
9803 (set_attr "prefix_extra" "1")
9804 (set_attr "prefix" "maybe_vex")
9805 (set_attr "mode" "TI")])
9807 (define_insn "sse4_1_<code>v4qiv4si2"
9808 [(set (match_operand:V4SI 0 "register_operand" "=x")
9811 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9812 (parallel [(const_int 0)
9817 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9818 [(set_attr "type" "ssemov")
9819 (set_attr "prefix_extra" "1")
9820 (set_attr "prefix" "maybe_vex")
9821 (set_attr "mode" "TI")])
9823 (define_insn "sse4_1_<code>v4hiv4si2"
9824 [(set (match_operand:V4SI 0 "register_operand" "=x")
9827 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9828 (parallel [(const_int 0)
9833 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9834 [(set_attr "type" "ssemov")
9835 (set_attr "prefix_extra" "1")
9836 (set_attr "prefix" "maybe_vex")
9837 (set_attr "mode" "TI")])
9839 (define_insn "sse4_1_<code>v2qiv2di2"
9840 [(set (match_operand:V2DI 0 "register_operand" "=x")
9843 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9844 (parallel [(const_int 0)
9847 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9848 [(set_attr "type" "ssemov")
9849 (set_attr "prefix_extra" "1")
9850 (set_attr "prefix" "maybe_vex")
9851 (set_attr "mode" "TI")])
9853 (define_insn "sse4_1_<code>v2hiv2di2"
9854 [(set (match_operand:V2DI 0 "register_operand" "=x")
9857 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9858 (parallel [(const_int 0)
9861 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9862 [(set_attr "type" "ssemov")
9863 (set_attr "prefix_extra" "1")
9864 (set_attr "prefix" "maybe_vex")
9865 (set_attr "mode" "TI")])
9867 (define_insn "sse4_1_<code>v2siv2di2"
9868 [(set (match_operand:V2DI 0 "register_operand" "=x")
9871 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9872 (parallel [(const_int 0)
9875 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9876 [(set_attr "type" "ssemov")
9877 (set_attr "prefix_extra" "1")
9878 (set_attr "prefix" "maybe_vex")
9879 (set_attr "mode" "TI")])
9881 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9882 ;; setting FLAGS_REG. But it is not a really compare instruction.
9883 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9884 [(set (reg:CC FLAGS_REG)
9885 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9886 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9889 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9890 [(set_attr "type" "ssecomi")
9891 (set_attr "prefix_extra" "1")
9892 (set_attr "prefix" "vex")
9893 (set_attr "mode" "<MODE>")])
9895 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9896 ;; But it is not a really compare instruction.
9897 (define_insn "avx_ptest256"
9898 [(set (reg:CC FLAGS_REG)
9899 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9900 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9903 "vptest\t{%1, %0|%0, %1}"
9904 [(set_attr "type" "ssecomi")
9905 (set_attr "prefix_extra" "1")
9906 (set_attr "prefix" "vex")
9907 (set_attr "mode" "OI")])
9909 (define_insn "sse4_1_ptest"
9910 [(set (reg:CC FLAGS_REG)
9911 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9912 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9915 "%vptest\t{%1, %0|%0, %1}"
9916 [(set_attr "type" "ssecomi")
9917 (set_attr "prefix_extra" "1")
9918 (set_attr "prefix" "maybe_vex")
9919 (set_attr "mode" "TI")])
9921 (define_insn "avx_round<ssemodesuffix>256"
9922 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9923 (unspec:AVX256MODEF2P
9924 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9925 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9928 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9929 [(set_attr "type" "ssecvt")
9930 (set_attr "prefix_extra" "1")
9931 (set_attr "length_immediate" "1")
9932 (set_attr "prefix" "vex")
9933 (set_attr "mode" "<MODE>")])
9935 (define_insn "sse4_1_round<ssemodesuffix>"
9936 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9938 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9939 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9942 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9943 [(set_attr "type" "ssecvt")
9944 (set_attr "prefix_data16" "1")
9945 (set_attr "prefix_extra" "1")
9946 (set_attr "length_immediate" "1")
9947 (set_attr "prefix" "maybe_vex")
9948 (set_attr "mode" "<MODE>")])
9950 (define_insn "*avx_round<ssescalarmodesuffix>"
9951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9952 (vec_merge:SSEMODEF2P
9954 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9955 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9957 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9960 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9961 [(set_attr "type" "ssecvt")
9962 (set_attr "prefix_extra" "1")
9963 (set_attr "length_immediate" "1")
9964 (set_attr "prefix" "vex")
9965 (set_attr "mode" "<MODE>")])
9967 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9968 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9969 (vec_merge:SSEMODEF2P
9971 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9972 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9974 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9977 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9978 [(set_attr "type" "ssecvt")
9979 (set_attr "prefix_data16" "1")
9980 (set_attr "prefix_extra" "1")
9981 (set_attr "length_immediate" "1")
9982 (set_attr "mode" "<MODE>")])
9984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9986 ;; Intel SSE4.2 string/text processing instructions
9988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9990 (define_insn_and_split "sse4_2_pcmpestr"
9991 [(set (match_operand:SI 0 "register_operand" "=c,c")
9993 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9994 (match_operand:SI 3 "register_operand" "a,a")
9995 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9996 (match_operand:SI 5 "register_operand" "d,d")
9997 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9999 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10007 (set (reg:CC FLAGS_REG)
10016 && can_create_pseudo_p ()"
10021 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10022 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10023 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10026 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10027 operands[3], operands[4],
10028 operands[5], operands[6]));
10030 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10031 operands[3], operands[4],
10032 operands[5], operands[6]));
10033 if (flags && !(ecx || xmm0))
10034 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10035 operands[2], operands[3],
10036 operands[4], operands[5],
10040 [(set_attr "type" "sselog")
10041 (set_attr "prefix_data16" "1")
10042 (set_attr "prefix_extra" "1")
10043 (set_attr "length_immediate" "1")
10044 (set_attr "memory" "none,load")
10045 (set_attr "mode" "TI")])
10047 (define_insn "sse4_2_pcmpestri"
10048 [(set (match_operand:SI 0 "register_operand" "=c,c")
10050 [(match_operand:V16QI 1 "register_operand" "x,x")
10051 (match_operand:SI 2 "register_operand" "a,a")
10052 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10053 (match_operand:SI 4 "register_operand" "d,d")
10054 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10056 (set (reg:CC FLAGS_REG)
10065 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10066 [(set_attr "type" "sselog")
10067 (set_attr "prefix_data16" "1")
10068 (set_attr "prefix_extra" "1")
10069 (set_attr "prefix" "maybe_vex")
10070 (set_attr "length_immediate" "1")
10071 (set_attr "memory" "none,load")
10072 (set_attr "mode" "TI")])
10074 (define_insn "sse4_2_pcmpestrm"
10075 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10077 [(match_operand:V16QI 1 "register_operand" "x,x")
10078 (match_operand:SI 2 "register_operand" "a,a")
10079 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10080 (match_operand:SI 4 "register_operand" "d,d")
10081 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10083 (set (reg:CC FLAGS_REG)
10092 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10093 [(set_attr "type" "sselog")
10094 (set_attr "prefix_data16" "1")
10095 (set_attr "prefix_extra" "1")
10096 (set_attr "length_immediate" "1")
10097 (set_attr "prefix" "maybe_vex")
10098 (set_attr "memory" "none,load")
10099 (set_attr "mode" "TI")])
10101 (define_insn "sse4_2_pcmpestr_cconly"
10102 [(set (reg:CC FLAGS_REG)
10104 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10105 (match_operand:SI 3 "register_operand" "a,a,a,a")
10106 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10107 (match_operand:SI 5 "register_operand" "d,d,d,d")
10108 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10110 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10111 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10114 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10115 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10116 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10117 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10118 [(set_attr "type" "sselog")
10119 (set_attr "prefix_data16" "1")
10120 (set_attr "prefix_extra" "1")
10121 (set_attr "length_immediate" "1")
10122 (set_attr "memory" "none,load,none,load")
10123 (set_attr "prefix" "maybe_vex")
10124 (set_attr "mode" "TI")])
10126 (define_insn_and_split "sse4_2_pcmpistr"
10127 [(set (match_operand:SI 0 "register_operand" "=c,c")
10129 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10130 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10131 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10133 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10139 (set (reg:CC FLAGS_REG)
10146 && can_create_pseudo_p ()"
10151 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10152 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10153 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10156 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10157 operands[3], operands[4]));
10159 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10160 operands[3], operands[4]));
10161 if (flags && !(ecx || xmm0))
10162 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10163 operands[2], operands[3],
10167 [(set_attr "type" "sselog")
10168 (set_attr "prefix_data16" "1")
10169 (set_attr "prefix_extra" "1")
10170 (set_attr "length_immediate" "1")
10171 (set_attr "memory" "none,load")
10172 (set_attr "mode" "TI")])
10174 (define_insn "sse4_2_pcmpistri"
10175 [(set (match_operand:SI 0 "register_operand" "=c,c")
10177 [(match_operand:V16QI 1 "register_operand" "x,x")
10178 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10179 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10181 (set (reg:CC FLAGS_REG)
10188 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10189 [(set_attr "type" "sselog")
10190 (set_attr "prefix_data16" "1")
10191 (set_attr "prefix_extra" "1")
10192 (set_attr "length_immediate" "1")
10193 (set_attr "prefix" "maybe_vex")
10194 (set_attr "memory" "none,load")
10195 (set_attr "mode" "TI")])
10197 (define_insn "sse4_2_pcmpistrm"
10198 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10200 [(match_operand:V16QI 1 "register_operand" "x,x")
10201 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10202 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10204 (set (reg:CC FLAGS_REG)
10211 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10212 [(set_attr "type" "sselog")
10213 (set_attr "prefix_data16" "1")
10214 (set_attr "prefix_extra" "1")
10215 (set_attr "length_immediate" "1")
10216 (set_attr "prefix" "maybe_vex")
10217 (set_attr "memory" "none,load")
10218 (set_attr "mode" "TI")])
10220 (define_insn "sse4_2_pcmpistr_cconly"
10221 [(set (reg:CC FLAGS_REG)
10223 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10224 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10225 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10227 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10228 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10231 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10232 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10233 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10234 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10235 [(set_attr "type" "sselog")
10236 (set_attr "prefix_data16" "1")
10237 (set_attr "prefix_extra" "1")
10238 (set_attr "length_immediate" "1")
10239 (set_attr "memory" "none,load,none,load")
10240 (set_attr "prefix" "maybe_vex")
10241 (set_attr "mode" "TI")])
10243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10245 ;; XOP instructions
10247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10249 ;; XOP parallel integer multiply/add instructions.
10250 ;; Note the XOP multiply/add instructions
10251 ;; a[i] = b[i] * c[i] + d[i];
10252 ;; do not allow the value being added to be a memory operation.
10253 (define_insn "xop_pmacsww"
10254 [(set (match_operand:V8HI 0 "register_operand" "=x")
10257 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10258 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10259 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10261 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10262 [(set_attr "type" "ssemuladd")
10263 (set_attr "mode" "TI")])
10265 (define_insn "xop_pmacssww"
10266 [(set (match_operand:V8HI 0 "register_operand" "=x")
10268 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10269 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10270 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10272 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10273 [(set_attr "type" "ssemuladd")
10274 (set_attr "mode" "TI")])
10276 (define_insn "xop_pmacsdd"
10277 [(set (match_operand:V4SI 0 "register_operand" "=x")
10280 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10281 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10282 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10284 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10285 [(set_attr "type" "ssemuladd")
10286 (set_attr "mode" "TI")])
10288 (define_insn "xop_pmacssdd"
10289 [(set (match_operand:V4SI 0 "register_operand" "=x")
10291 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10292 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10293 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10295 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10296 [(set_attr "type" "ssemuladd")
10297 (set_attr "mode" "TI")])
10299 (define_insn "xop_pmacssdql"
10300 [(set (match_operand:V2DI 0 "register_operand" "=x")
10305 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10306 (parallel [(const_int 1)
10309 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10310 (parallel [(const_int 1)
10312 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10314 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10315 [(set_attr "type" "ssemuladd")
10316 (set_attr "mode" "TI")])
10318 (define_insn "xop_pmacssdqh"
10319 [(set (match_operand:V2DI 0 "register_operand" "=x")
10324 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10325 (parallel [(const_int 0)
10329 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10330 (parallel [(const_int 0)
10332 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10334 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10335 [(set_attr "type" "ssemuladd")
10336 (set_attr "mode" "TI")])
10338 (define_insn "xop_pmacsdql"
10339 [(set (match_operand:V2DI 0 "register_operand" "=x")
10344 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10345 (parallel [(const_int 1)
10349 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10350 (parallel [(const_int 1)
10352 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10354 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10355 [(set_attr "type" "ssemuladd")
10356 (set_attr "mode" "TI")])
10358 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10359 ;; fake it with a multiply/add. In general, we expect the define_split to
10360 ;; occur before register allocation, so we have to handle the corner case where
10361 ;; the target is the same as operands 1/2
10362 (define_insn_and_split "xop_mulv2div2di3_low"
10363 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10367 (match_operand:V4SI 1 "register_operand" "%x")
10368 (parallel [(const_int 1)
10372 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10373 (parallel [(const_int 1)
10374 (const_int 3)])))))]
10377 "&& reload_completed"
10378 [(set (match_dup 0)
10386 (parallel [(const_int 1)
10391 (parallel [(const_int 1)
10395 operands[3] = CONST0_RTX (V2DImode);
10397 [(set_attr "type" "ssemul")
10398 (set_attr "mode" "TI")])
10400 (define_insn "xop_pmacsdqh"
10401 [(set (match_operand:V2DI 0 "register_operand" "=x")
10406 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10407 (parallel [(const_int 0)
10411 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10412 (parallel [(const_int 0)
10414 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10416 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10417 [(set_attr "type" "ssemuladd")
10418 (set_attr "mode" "TI")])
10420 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10421 ;; fake it with a multiply/add. In general, we expect the define_split to
10422 ;; occur before register allocation, so we have to handle the corner case where
10423 ;; the target is the same as either operands[1] or operands[2]
10424 (define_insn_and_split "xop_mulv2div2di3_high"
10425 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10429 (match_operand:V4SI 1 "register_operand" "%x")
10430 (parallel [(const_int 0)
10434 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10435 (parallel [(const_int 0)
10436 (const_int 2)])))))]
10439 "&& reload_completed"
10440 [(set (match_dup 0)
10448 (parallel [(const_int 0)
10453 (parallel [(const_int 0)
10457 operands[3] = CONST0_RTX (V2DImode);
10459 [(set_attr "type" "ssemul")
10460 (set_attr "mode" "TI")])
10462 ;; XOP parallel integer multiply/add instructions for the intrinisics
10463 (define_insn "xop_pmacsswd"
10464 [(set (match_operand:V4SI 0 "register_operand" "=x")
10469 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10470 (parallel [(const_int 1)
10476 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10477 (parallel [(const_int 1)
10481 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10483 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10484 [(set_attr "type" "ssemuladd")
10485 (set_attr "mode" "TI")])
10487 (define_insn "xop_pmacswd"
10488 [(set (match_operand:V4SI 0 "register_operand" "=x")
10493 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10494 (parallel [(const_int 1)
10500 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10501 (parallel [(const_int 1)
10505 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10507 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10508 [(set_attr "type" "ssemuladd")
10509 (set_attr "mode" "TI")])
10511 (define_insn "xop_pmadcsswd"
10512 [(set (match_operand:V4SI 0 "register_operand" "=x")
10518 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10519 (parallel [(const_int 0)
10525 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10526 (parallel [(const_int 0)
10534 (parallel [(const_int 1)
10541 (parallel [(const_int 1)
10544 (const_int 7)])))))
10545 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10547 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10548 [(set_attr "type" "ssemuladd")
10549 (set_attr "mode" "TI")])
10551 (define_insn "xop_pmadcswd"
10552 [(set (match_operand:V4SI 0 "register_operand" "=x")
10558 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10559 (parallel [(const_int 0)
10565 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10566 (parallel [(const_int 0)
10574 (parallel [(const_int 1)
10581 (parallel [(const_int 1)
10584 (const_int 7)])))))
10585 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10587 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10588 [(set_attr "type" "ssemuladd")
10589 (set_attr "mode" "TI")])
10591 ;; XOP parallel XMM conditional moves
10592 (define_insn "xop_pcmov_<mode>"
10593 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10594 (if_then_else:SSEMODE
10595 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10596 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10597 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10599 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10600 [(set_attr "type" "sse4arg")])
10602 (define_insn "xop_pcmov_<mode>256"
10603 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10604 (if_then_else:AVX256MODE
10605 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10606 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10607 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10609 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10610 [(set_attr "type" "sse4arg")])
10612 ;; XOP horizontal add/subtract instructions
10613 (define_insn "xop_phaddbw"
10614 [(set (match_operand:V8HI 0 "register_operand" "=x")
10618 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10619 (parallel [(const_int 0)
10630 (parallel [(const_int 1)
10637 (const_int 15)])))))]
10639 "vphaddbw\t{%1, %0|%0, %1}"
10640 [(set_attr "type" "sseiadd1")])
10642 (define_insn "xop_phaddbd"
10643 [(set (match_operand:V4SI 0 "register_operand" "=x")
10648 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10649 (parallel [(const_int 0)
10656 (parallel [(const_int 1)
10659 (const_int 13)]))))
10664 (parallel [(const_int 2)
10671 (parallel [(const_int 3)
10674 (const_int 15)]))))))]
10676 "vphaddbd\t{%1, %0|%0, %1}"
10677 [(set_attr "type" "sseiadd1")])
10679 (define_insn "xop_phaddbq"
10680 [(set (match_operand:V2DI 0 "register_operand" "=x")
10686 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10687 (parallel [(const_int 0)
10692 (parallel [(const_int 1)
10698 (parallel [(const_int 2)
10703 (parallel [(const_int 3)
10704 (const_int 7)])))))
10710 (parallel [(const_int 8)
10715 (parallel [(const_int 9)
10716 (const_int 13)]))))
10721 (parallel [(const_int 10)
10726 (parallel [(const_int 11)
10727 (const_int 15)])))))))]
10729 "vphaddbq\t{%1, %0|%0, %1}"
10730 [(set_attr "type" "sseiadd1")])
10732 (define_insn "xop_phaddwd"
10733 [(set (match_operand:V4SI 0 "register_operand" "=x")
10737 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10738 (parallel [(const_int 0)
10745 (parallel [(const_int 1)
10748 (const_int 7)])))))]
10750 "vphaddwd\t{%1, %0|%0, %1}"
10751 [(set_attr "type" "sseiadd1")])
10753 (define_insn "xop_phaddwq"
10754 [(set (match_operand:V2DI 0 "register_operand" "=x")
10759 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10760 (parallel [(const_int 0)
10765 (parallel [(const_int 1)
10771 (parallel [(const_int 2)
10776 (parallel [(const_int 3)
10777 (const_int 7)]))))))]
10779 "vphaddwq\t{%1, %0|%0, %1}"
10780 [(set_attr "type" "sseiadd1")])
10782 (define_insn "xop_phadddq"
10783 [(set (match_operand:V2DI 0 "register_operand" "=x")
10787 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10788 (parallel [(const_int 0)
10793 (parallel [(const_int 1)
10794 (const_int 3)])))))]
10796 "vphadddq\t{%1, %0|%0, %1}"
10797 [(set_attr "type" "sseiadd1")])
10799 (define_insn "xop_phaddubw"
10800 [(set (match_operand:V8HI 0 "register_operand" "=x")
10804 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10805 (parallel [(const_int 0)
10816 (parallel [(const_int 1)
10823 (const_int 15)])))))]
10825 "vphaddubw\t{%1, %0|%0, %1}"
10826 [(set_attr "type" "sseiadd1")])
10828 (define_insn "xop_phaddubd"
10829 [(set (match_operand:V4SI 0 "register_operand" "=x")
10834 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10835 (parallel [(const_int 0)
10842 (parallel [(const_int 1)
10845 (const_int 13)]))))
10850 (parallel [(const_int 2)
10857 (parallel [(const_int 3)
10860 (const_int 15)]))))))]
10862 "vphaddubd\t{%1, %0|%0, %1}"
10863 [(set_attr "type" "sseiadd1")])
10865 (define_insn "xop_phaddubq"
10866 [(set (match_operand:V2DI 0 "register_operand" "=x")
10872 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10873 (parallel [(const_int 0)
10878 (parallel [(const_int 1)
10884 (parallel [(const_int 2)
10889 (parallel [(const_int 3)
10890 (const_int 7)])))))
10896 (parallel [(const_int 8)
10901 (parallel [(const_int 9)
10902 (const_int 13)]))))
10907 (parallel [(const_int 10)
10912 (parallel [(const_int 11)
10913 (const_int 15)])))))))]
10915 "vphaddubq\t{%1, %0|%0, %1}"
10916 [(set_attr "type" "sseiadd1")])
10918 (define_insn "xop_phadduwd"
10919 [(set (match_operand:V4SI 0 "register_operand" "=x")
10923 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10924 (parallel [(const_int 0)
10931 (parallel [(const_int 1)
10934 (const_int 7)])))))]
10936 "vphadduwd\t{%1, %0|%0, %1}"
10937 [(set_attr "type" "sseiadd1")])
10939 (define_insn "xop_phadduwq"
10940 [(set (match_operand:V2DI 0 "register_operand" "=x")
10945 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10946 (parallel [(const_int 0)
10951 (parallel [(const_int 1)
10957 (parallel [(const_int 2)
10962 (parallel [(const_int 3)
10963 (const_int 7)]))))))]
10965 "vphadduwq\t{%1, %0|%0, %1}"
10966 [(set_attr "type" "sseiadd1")])
10968 (define_insn "xop_phaddudq"
10969 [(set (match_operand:V2DI 0 "register_operand" "=x")
10973 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10974 (parallel [(const_int 0)
10979 (parallel [(const_int 1)
10980 (const_int 3)])))))]
10982 "vphaddudq\t{%1, %0|%0, %1}"
10983 [(set_attr "type" "sseiadd1")])
10985 (define_insn "xop_phsubbw"
10986 [(set (match_operand:V8HI 0 "register_operand" "=x")
10990 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10991 (parallel [(const_int 0)
11002 (parallel [(const_int 1)
11009 (const_int 15)])))))]
11011 "vphsubbw\t{%1, %0|%0, %1}"
11012 [(set_attr "type" "sseiadd1")])
11014 (define_insn "xop_phsubwd"
11015 [(set (match_operand:V4SI 0 "register_operand" "=x")
11019 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11020 (parallel [(const_int 0)
11027 (parallel [(const_int 1)
11030 (const_int 7)])))))]
11032 "vphsubwd\t{%1, %0|%0, %1}"
11033 [(set_attr "type" "sseiadd1")])
11035 (define_insn "xop_phsubdq"
11036 [(set (match_operand:V2DI 0 "register_operand" "=x")
11040 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11041 (parallel [(const_int 0)
11046 (parallel [(const_int 1)
11047 (const_int 3)])))))]
11049 "vphsubdq\t{%1, %0|%0, %1}"
11050 [(set_attr "type" "sseiadd1")])
11052 ;; XOP permute instructions
11053 (define_insn "xop_pperm"
11054 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11056 [(match_operand:V16QI 1 "register_operand" "x,x")
11057 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11058 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11059 UNSPEC_XOP_PERMUTE))]
11060 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11061 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11062 [(set_attr "type" "sse4arg")
11063 (set_attr "mode" "TI")])
11065 ;; XOP pack instructions that combine two vectors into a smaller vector
11066 (define_insn "xop_pperm_pack_v2di_v4si"
11067 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11070 (match_operand:V2DI 1 "register_operand" "x,x"))
11072 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11073 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11074 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11075 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11076 [(set_attr "type" "sse4arg")
11077 (set_attr "mode" "TI")])
11079 (define_insn "xop_pperm_pack_v4si_v8hi"
11080 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11083 (match_operand:V4SI 1 "register_operand" "x,x"))
11085 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11086 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11087 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11088 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11089 [(set_attr "type" "sse4arg")
11090 (set_attr "mode" "TI")])
11092 (define_insn "xop_pperm_pack_v8hi_v16qi"
11093 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11096 (match_operand:V8HI 1 "register_operand" "x,x"))
11098 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11099 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11100 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11101 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11102 [(set_attr "type" "sse4arg")
11103 (set_attr "mode" "TI")])
11105 ;; XOP packed rotate instructions
11106 (define_expand "rotl<mode>3"
11107 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11108 (rotate:SSEMODE1248
11109 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11110 (match_operand:SI 2 "general_operand")))]
11113 /* If we were given a scalar, convert it to parallel */
11114 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11116 rtvec vs = rtvec_alloc (<ssescalarnum>);
11117 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11118 rtx reg = gen_reg_rtx (<MODE>mode);
11119 rtx op2 = operands[2];
11122 if (GET_MODE (op2) != <ssescalarmode>mode)
11124 op2 = gen_reg_rtx (<ssescalarmode>mode);
11125 convert_move (op2, operands[2], false);
11128 for (i = 0; i < <ssescalarnum>; i++)
11129 RTVEC_ELT (vs, i) = op2;
11131 emit_insn (gen_vec_init<mode> (reg, par));
11132 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11137 (define_expand "rotr<mode>3"
11138 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11139 (rotatert:SSEMODE1248
11140 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11141 (match_operand:SI 2 "general_operand")))]
11144 /* If we were given a scalar, convert it to parallel */
11145 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11147 rtvec vs = rtvec_alloc (<ssescalarnum>);
11148 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11149 rtx neg = gen_reg_rtx (<MODE>mode);
11150 rtx reg = gen_reg_rtx (<MODE>mode);
11151 rtx op2 = operands[2];
11154 if (GET_MODE (op2) != <ssescalarmode>mode)
11156 op2 = gen_reg_rtx (<ssescalarmode>mode);
11157 convert_move (op2, operands[2], false);
11160 for (i = 0; i < <ssescalarnum>; i++)
11161 RTVEC_ELT (vs, i) = op2;
11163 emit_insn (gen_vec_init<mode> (reg, par));
11164 emit_insn (gen_neg<mode>2 (neg, reg));
11165 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11170 (define_insn "xop_rotl<mode>3"
11171 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11172 (rotate:SSEMODE1248
11173 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11174 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11176 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11177 [(set_attr "type" "sseishft")
11178 (set_attr "length_immediate" "1")
11179 (set_attr "mode" "TI")])
11181 (define_insn "xop_rotr<mode>3"
11182 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11183 (rotatert:SSEMODE1248
11184 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11185 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11188 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11189 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11191 [(set_attr "type" "sseishft")
11192 (set_attr "length_immediate" "1")
11193 (set_attr "mode" "TI")])
11195 (define_expand "vrotr<mode>3"
11196 [(match_operand:SSEMODE1248 0 "register_operand" "")
11197 (match_operand:SSEMODE1248 1 "register_operand" "")
11198 (match_operand:SSEMODE1248 2 "register_operand" "")]
11201 rtx reg = gen_reg_rtx (<MODE>mode);
11202 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11203 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11207 (define_expand "vrotl<mode>3"
11208 [(match_operand:SSEMODE1248 0 "register_operand" "")
11209 (match_operand:SSEMODE1248 1 "register_operand" "")
11210 (match_operand:SSEMODE1248 2 "register_operand" "")]
11213 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11217 (define_insn "xop_vrotl<mode>3"
11218 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11219 (if_then_else:SSEMODE1248
11221 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11223 (rotate:SSEMODE1248
11224 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11226 (rotatert:SSEMODE1248
11228 (neg:SSEMODE1248 (match_dup 2)))))]
11229 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11230 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11231 [(set_attr "type" "sseishft")
11232 (set_attr "prefix_data16" "0")
11233 (set_attr "prefix_extra" "2")
11234 (set_attr "mode" "TI")])
11236 ;; XOP packed shift instructions.
11237 ;; FIXME: add V2DI back in
11238 (define_expand "vlshr<mode>3"
11239 [(match_operand:SSEMODE124 0 "register_operand" "")
11240 (match_operand:SSEMODE124 1 "register_operand" "")
11241 (match_operand:SSEMODE124 2 "register_operand" "")]
11244 rtx neg = gen_reg_rtx (<MODE>mode);
11245 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11246 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11250 (define_expand "vashr<mode>3"
11251 [(match_operand:SSEMODE124 0 "register_operand" "")
11252 (match_operand:SSEMODE124 1 "register_operand" "")
11253 (match_operand:SSEMODE124 2 "register_operand" "")]
11256 rtx neg = gen_reg_rtx (<MODE>mode);
11257 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11258 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11262 (define_expand "vashl<mode>3"
11263 [(match_operand:SSEMODE124 0 "register_operand" "")
11264 (match_operand:SSEMODE124 1 "register_operand" "")
11265 (match_operand:SSEMODE124 2 "register_operand" "")]
11268 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11272 (define_insn "xop_ashl<mode>3"
11273 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11274 (if_then_else:SSEMODE1248
11276 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11278 (ashift:SSEMODE1248
11279 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11281 (ashiftrt:SSEMODE1248
11283 (neg:SSEMODE1248 (match_dup 2)))))]
11284 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11285 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11286 [(set_attr "type" "sseishft")
11287 (set_attr "prefix_data16" "0")
11288 (set_attr "prefix_extra" "2")
11289 (set_attr "mode" "TI")])
11291 (define_insn "xop_lshl<mode>3"
11292 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11293 (if_then_else:SSEMODE1248
11295 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11297 (ashift:SSEMODE1248
11298 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11300 (lshiftrt:SSEMODE1248
11302 (neg:SSEMODE1248 (match_dup 2)))))]
11303 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11304 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11305 [(set_attr "type" "sseishft")
11306 (set_attr "prefix_data16" "0")
11307 (set_attr "prefix_extra" "2")
11308 (set_attr "mode" "TI")])
11310 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11311 (define_expand "ashlv16qi3"
11312 [(match_operand:V16QI 0 "register_operand" "")
11313 (match_operand:V16QI 1 "register_operand" "")
11314 (match_operand:SI 2 "nonmemory_operand" "")]
11317 rtvec vs = rtvec_alloc (16);
11318 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11319 rtx reg = gen_reg_rtx (V16QImode);
11321 for (i = 0; i < 16; i++)
11322 RTVEC_ELT (vs, i) = operands[2];
11324 emit_insn (gen_vec_initv16qi (reg, par));
11325 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11329 (define_expand "lshlv16qi3"
11330 [(match_operand:V16QI 0 "register_operand" "")
11331 (match_operand:V16QI 1 "register_operand" "")
11332 (match_operand:SI 2 "nonmemory_operand" "")]
11335 rtvec vs = rtvec_alloc (16);
11336 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11337 rtx reg = gen_reg_rtx (V16QImode);
11339 for (i = 0; i < 16; i++)
11340 RTVEC_ELT (vs, i) = operands[2];
11342 emit_insn (gen_vec_initv16qi (reg, par));
11343 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11347 (define_expand "ashrv16qi3"
11348 [(match_operand:V16QI 0 "register_operand" "")
11349 (match_operand:V16QI 1 "register_operand" "")
11350 (match_operand:SI 2 "nonmemory_operand" "")]
11353 rtvec vs = rtvec_alloc (16);
11354 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11355 rtx reg = gen_reg_rtx (V16QImode);
11357 rtx ele = ((CONST_INT_P (operands[2]))
11358 ? GEN_INT (- INTVAL (operands[2]))
11361 for (i = 0; i < 16; i++)
11362 RTVEC_ELT (vs, i) = ele;
11364 emit_insn (gen_vec_initv16qi (reg, par));
11366 if (!CONST_INT_P (operands[2]))
11368 rtx neg = gen_reg_rtx (V16QImode);
11369 emit_insn (gen_negv16qi2 (neg, reg));
11370 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11373 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11378 (define_expand "ashrv2di3"
11379 [(match_operand:V2DI 0 "register_operand" "")
11380 (match_operand:V2DI 1 "register_operand" "")
11381 (match_operand:DI 2 "nonmemory_operand" "")]
11384 rtvec vs = rtvec_alloc (2);
11385 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11386 rtx reg = gen_reg_rtx (V2DImode);
11389 if (CONST_INT_P (operands[2]))
11390 ele = GEN_INT (- INTVAL (operands[2]));
11391 else if (GET_MODE (operands[2]) != DImode)
11393 rtx move = gen_reg_rtx (DImode);
11394 ele = gen_reg_rtx (DImode);
11395 convert_move (move, operands[2], false);
11396 emit_insn (gen_negdi2 (ele, move));
11400 ele = gen_reg_rtx (DImode);
11401 emit_insn (gen_negdi2 (ele, operands[2]));
11404 RTVEC_ELT (vs, 0) = ele;
11405 RTVEC_ELT (vs, 1) = ele;
11406 emit_insn (gen_vec_initv2di (reg, par));
11407 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11411 ;; XOP FRCZ support
11413 (define_insn "xop_frcz<mode>2"
11414 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11416 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11419 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11420 [(set_attr "type" "ssecvt1")
11421 (set_attr "mode" "<MODE>")])
11424 (define_insn "xop_vmfrcz<mode>2"
11425 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11426 (vec_merge:SSEMODEF2P
11428 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11430 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11433 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11434 [(set_attr "type" "ssecvt1")
11435 (set_attr "mode" "<MODE>")])
11437 (define_insn "xop_frcz<mode>2256"
11438 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11440 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11443 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11444 [(set_attr "type" "ssecvt1")
11445 (set_attr "mode" "<MODE>")])
11447 (define_insn "xop_maskcmp<mode>3"
11448 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11449 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11450 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11451 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11453 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11454 [(set_attr "type" "sse4arg")
11455 (set_attr "prefix_data16" "0")
11456 (set_attr "prefix_rep" "0")
11457 (set_attr "prefix_extra" "2")
11458 (set_attr "length_immediate" "1")
11459 (set_attr "mode" "TI")])
11461 (define_insn "xop_maskcmp_uns<mode>3"
11462 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11463 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11464 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11465 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11467 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11468 [(set_attr "type" "ssecmp")
11469 (set_attr "prefix_data16" "0")
11470 (set_attr "prefix_rep" "0")
11471 (set_attr "prefix_extra" "2")
11472 (set_attr "length_immediate" "1")
11473 (set_attr "mode" "TI")])
11475 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11476 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11477 ;; the exact instruction generated for the intrinsic.
11478 (define_insn "xop_maskcmp_uns2<mode>3"
11479 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11480 (unspec:SSEMODE1248
11481 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11482 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11483 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11484 UNSPEC_XOP_UNSIGNED_CMP))]
11486 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11487 [(set_attr "type" "ssecmp")
11488 (set_attr "prefix_data16" "0")
11489 (set_attr "prefix_extra" "2")
11490 (set_attr "length_immediate" "1")
11491 (set_attr "mode" "TI")])
11493 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11494 ;; being added here to be complete.
11495 (define_insn "xop_pcom_tf<mode>3"
11496 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11497 (unspec:SSEMODE1248
11498 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11499 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11500 (match_operand:SI 3 "const_int_operand" "n")]
11501 UNSPEC_XOP_TRUEFALSE))]
11504 return ((INTVAL (operands[3]) != 0)
11505 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11506 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11508 [(set_attr "type" "ssecmp")
11509 (set_attr "prefix_data16" "0")
11510 (set_attr "prefix_extra" "2")
11511 (set_attr "length_immediate" "1")
11512 (set_attr "mode" "TI")])
11514 (define_insn "xop_vpermil2<mode>3"
11515 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11517 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11518 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11519 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11520 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11523 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11524 [(set_attr "type" "sse4arg")
11525 (set_attr "length_immediate" "1")
11526 (set_attr "mode" "<MODE>")])
11528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11529 (define_insn "*avx_aesenc"
11530 [(set (match_operand:V2DI 0 "register_operand" "=x")
11531 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11532 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11534 "TARGET_AES && TARGET_AVX"
11535 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11536 [(set_attr "type" "sselog1")
11537 (set_attr "prefix_extra" "1")
11538 (set_attr "prefix" "vex")
11539 (set_attr "mode" "TI")])
11541 (define_insn "aesenc"
11542 [(set (match_operand:V2DI 0 "register_operand" "=x")
11543 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11544 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11547 "aesenc\t{%2, %0|%0, %2}"
11548 [(set_attr "type" "sselog1")
11549 (set_attr "prefix_extra" "1")
11550 (set_attr "mode" "TI")])
11552 (define_insn "*avx_aesenclast"
11553 [(set (match_operand:V2DI 0 "register_operand" "=x")
11554 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11555 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11556 UNSPEC_AESENCLAST))]
11557 "TARGET_AES && TARGET_AVX"
11558 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11559 [(set_attr "type" "sselog1")
11560 (set_attr "prefix_extra" "1")
11561 (set_attr "prefix" "vex")
11562 (set_attr "mode" "TI")])
11564 (define_insn "aesenclast"
11565 [(set (match_operand:V2DI 0 "register_operand" "=x")
11566 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11567 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11568 UNSPEC_AESENCLAST))]
11570 "aesenclast\t{%2, %0|%0, %2}"
11571 [(set_attr "type" "sselog1")
11572 (set_attr "prefix_extra" "1")
11573 (set_attr "mode" "TI")])
11575 (define_insn "*avx_aesdec"
11576 [(set (match_operand:V2DI 0 "register_operand" "=x")
11577 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11578 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11580 "TARGET_AES && TARGET_AVX"
11581 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11582 [(set_attr "type" "sselog1")
11583 (set_attr "prefix_extra" "1")
11584 (set_attr "prefix" "vex")
11585 (set_attr "mode" "TI")])
11587 (define_insn "aesdec"
11588 [(set (match_operand:V2DI 0 "register_operand" "=x")
11589 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11590 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11593 "aesdec\t{%2, %0|%0, %2}"
11594 [(set_attr "type" "sselog1")
11595 (set_attr "prefix_extra" "1")
11596 (set_attr "mode" "TI")])
11598 (define_insn "*avx_aesdeclast"
11599 [(set (match_operand:V2DI 0 "register_operand" "=x")
11600 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11601 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11602 UNSPEC_AESDECLAST))]
11603 "TARGET_AES && TARGET_AVX"
11604 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11605 [(set_attr "type" "sselog1")
11606 (set_attr "prefix_extra" "1")
11607 (set_attr "prefix" "vex")
11608 (set_attr "mode" "TI")])
11610 (define_insn "aesdeclast"
11611 [(set (match_operand:V2DI 0 "register_operand" "=x")
11612 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11613 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11614 UNSPEC_AESDECLAST))]
11616 "aesdeclast\t{%2, %0|%0, %2}"
11617 [(set_attr "type" "sselog1")
11618 (set_attr "prefix_extra" "1")
11619 (set_attr "mode" "TI")])
11621 (define_insn "aesimc"
11622 [(set (match_operand:V2DI 0 "register_operand" "=x")
11623 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11626 "%vaesimc\t{%1, %0|%0, %1}"
11627 [(set_attr "type" "sselog1")
11628 (set_attr "prefix_extra" "1")
11629 (set_attr "prefix" "maybe_vex")
11630 (set_attr "mode" "TI")])
11632 (define_insn "aeskeygenassist"
11633 [(set (match_operand:V2DI 0 "register_operand" "=x")
11634 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11635 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11636 UNSPEC_AESKEYGENASSIST))]
11638 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11639 [(set_attr "type" "sselog1")
11640 (set_attr "prefix_extra" "1")
11641 (set_attr "length_immediate" "1")
11642 (set_attr "prefix" "maybe_vex")
11643 (set_attr "mode" "TI")])
11645 (define_insn "*vpclmulqdq"
11646 [(set (match_operand:V2DI 0 "register_operand" "=x")
11647 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11648 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11649 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11651 "TARGET_PCLMUL && TARGET_AVX"
11652 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11653 [(set_attr "type" "sselog1")
11654 (set_attr "prefix_extra" "1")
11655 (set_attr "length_immediate" "1")
11656 (set_attr "prefix" "vex")
11657 (set_attr "mode" "TI")])
11659 (define_insn "pclmulqdq"
11660 [(set (match_operand:V2DI 0 "register_operand" "=x")
11661 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11662 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11663 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11666 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11667 [(set_attr "type" "sselog1")
11668 (set_attr "prefix_extra" "1")
11669 (set_attr "length_immediate" "1")
11670 (set_attr "mode" "TI")])
11672 (define_expand "avx_vzeroall"
11673 [(match_par_dup 0 [(const_int 0)])]
11676 int nregs = TARGET_64BIT ? 16 : 8;
11679 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11681 XVECEXP (operands[0], 0, 0)
11682 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11685 for (regno = 0; regno < nregs; regno++)
11686 XVECEXP (operands[0], 0, regno + 1)
11687 = gen_rtx_SET (VOIDmode,
11688 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11689 CONST0_RTX (V8SImode));
11692 (define_insn "*avx_vzeroall"
11693 [(match_parallel 0 "vzeroall_operation"
11694 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11697 [(set_attr "type" "sse")
11698 (set_attr "modrm" "0")
11699 (set_attr "memory" "none")
11700 (set_attr "prefix" "vex")
11701 (set_attr "mode" "OI")])
11703 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11704 (define_expand "avx_vzeroupper"
11705 [(match_par_dup 0 [(const_int 0)])]
11708 int nregs = TARGET_64BIT ? 16 : 8;
11711 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11713 XVECEXP (operands[0], 0, 0)
11714 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11715 UNSPECV_VZEROUPPER);
11717 for (regno = 0; regno < nregs; regno++)
11718 XVECEXP (operands[0], 0, regno + 1)
11719 = gen_rtx_CLOBBER (VOIDmode,
11720 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11723 (define_insn "*avx_vzeroupper"
11724 [(match_parallel 0 "vzeroupper_operation"
11725 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11728 [(set_attr "type" "sse")
11729 (set_attr "modrm" "0")
11730 (set_attr "memory" "none")
11731 (set_attr "prefix" "vex")
11732 (set_attr "mode" "OI")])
11734 (define_insn_and_split "vec_dup<mode>"
11735 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11736 (vec_duplicate:AVX256MODE24P
11737 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11740 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11742 "&& reload_completed && REG_P (operands[1])"
11743 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11744 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11745 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11746 [(set_attr "type" "ssemov")
11747 (set_attr "prefix_extra" "1")
11748 (set_attr "prefix" "vex")
11749 (set_attr "mode" "V8SF")])
11751 (define_insn "avx_vbroadcastf128_<mode>"
11752 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11753 (vec_concat:AVX256MODE
11754 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11758 vbroadcastf128\t{%1, %0|%0, %1}
11759 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11760 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11761 [(set_attr "type" "ssemov,sselog1,sselog1")
11762 (set_attr "prefix_extra" "1")
11763 (set_attr "length_immediate" "0,1,1")
11764 (set_attr "prefix" "vex")
11765 (set_attr "mode" "V4SF,V8SF,V8SF")])
11767 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11768 ;; If it so happens that the input is in memory, use vbroadcast.
11769 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11770 (define_insn "*avx_vperm_broadcast_v4sf"
11771 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11773 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11774 (match_parallel 2 "avx_vbroadcast_operand"
11775 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11778 int elt = INTVAL (operands[3]);
11779 switch (which_alternative)
11783 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11784 return "vbroadcastss\t{%1, %0|%0, %1}";
11786 operands[2] = GEN_INT (elt * 0x55);
11787 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11789 gcc_unreachable ();
11792 [(set_attr "type" "ssemov,ssemov,sselog1")
11793 (set_attr "prefix_extra" "1")
11794 (set_attr "length_immediate" "0,0,1")
11795 (set_attr "prefix" "vex")
11796 (set_attr "mode" "SF,SF,V4SF")])
11798 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11799 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11800 (vec_select:AVX256MODEF2P
11801 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11802 (match_parallel 2 "avx_vbroadcast_operand"
11803 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11806 "&& reload_completed"
11807 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11809 rtx op0 = operands[0], op1 = operands[1];
11810 int elt = INTVAL (operands[3]);
11816 /* Shuffle element we care about into all elements of the 128-bit lane.
11817 The other lane gets shuffled too, but we don't care. */
11818 if (<MODE>mode == V4DFmode)
11819 mask = (elt & 1 ? 15 : 0);
11821 mask = (elt & 3) * 0x55;
11822 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11824 /* Shuffle the lane we care about into both lanes of the dest. */
11825 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11826 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11830 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11831 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11834 (define_expand "avx_vpermil<mode>"
11835 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11836 (vec_select:AVXMODEFDP
11837 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11838 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11841 int mask = INTVAL (operands[2]);
11842 rtx perm[<ssescalarnum>];
11844 perm[0] = GEN_INT (mask & 1);
11845 perm[1] = GEN_INT ((mask >> 1) & 1);
11846 if (<MODE>mode == V4DFmode)
11848 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11849 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11853 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11856 (define_expand "avx_vpermil<mode>"
11857 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11858 (vec_select:AVXMODEFSP
11859 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11860 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11863 int mask = INTVAL (operands[2]);
11864 rtx perm[<ssescalarnum>];
11866 perm[0] = GEN_INT (mask & 3);
11867 perm[1] = GEN_INT ((mask >> 2) & 3);
11868 perm[2] = GEN_INT ((mask >> 4) & 3);
11869 perm[3] = GEN_INT ((mask >> 6) & 3);
11870 if (<MODE>mode == V8SFmode)
11872 perm[4] = GEN_INT ((mask & 3) + 4);
11873 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11874 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11875 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11879 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11882 (define_insn "*avx_vpermilp<mode>"
11883 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11884 (vec_select:AVXMODEF2P
11885 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11886 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11887 [(match_operand 3 "const_int_operand" "")])))]
11890 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11891 operands[2] = GEN_INT (mask);
11892 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11894 [(set_attr "type" "sselog")
11895 (set_attr "prefix_extra" "1")
11896 (set_attr "length_immediate" "1")
11897 (set_attr "prefix" "vex")
11898 (set_attr "mode" "<MODE>")])
11900 (define_insn "avx_vpermilvar<mode>3"
11901 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11903 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11904 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11907 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11908 [(set_attr "type" "sselog")
11909 (set_attr "prefix_extra" "1")
11910 (set_attr "prefix" "vex")
11911 (set_attr "mode" "<MODE>")])
11913 (define_expand "avx_vperm2f128<mode>3"
11914 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11915 (unspec:AVX256MODE2P
11916 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11917 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11918 (match_operand:SI 3 "const_0_to_255_operand" "")]
11919 UNSPEC_VPERMIL2F128))]
11922 int mask = INTVAL (operands[3]);
11923 if ((mask & 0x88) == 0)
11925 rtx perm[<ssescalarnum>], t1, t2;
11926 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11928 base = (mask & 3) * nelt2;
11929 for (i = 0; i < nelt2; ++i)
11930 perm[i] = GEN_INT (base + i);
11932 base = ((mask >> 4) & 3) * nelt2;
11933 for (i = 0; i < nelt2; ++i)
11934 perm[i + nelt2] = GEN_INT (base + i);
11936 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11937 operands[1], operands[2]);
11938 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11939 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11940 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11946 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11947 ;; means that in order to represent this properly in rtl we'd have to
11948 ;; nest *another* vec_concat with a zero operand and do the select from
11949 ;; a 4x wide vector. That doesn't seem very nice.
11950 (define_insn "*avx_vperm2f128<mode>_full"
11951 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11952 (unspec:AVX256MODE2P
11953 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11954 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11955 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11956 UNSPEC_VPERMIL2F128))]
11958 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11959 [(set_attr "type" "sselog")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "length_immediate" "1")
11962 (set_attr "prefix" "vex")
11963 (set_attr "mode" "V8SF")])
11965 (define_insn "*avx_vperm2f128<mode>_nozero"
11966 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11967 (vec_select:AVX256MODE2P
11968 (vec_concat:<ssedoublesizemode>
11969 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11970 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11971 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11972 [(match_operand 4 "const_int_operand" "")])))]
11975 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11976 operands[3] = GEN_INT (mask);
11977 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11979 [(set_attr "type" "sselog")
11980 (set_attr "prefix_extra" "1")
11981 (set_attr "length_immediate" "1")
11982 (set_attr "prefix" "vex")
11983 (set_attr "mode" "V8SF")])
11985 (define_expand "avx_vinsertf128<mode>"
11986 [(match_operand:AVX256MODE 0 "register_operand" "")
11987 (match_operand:AVX256MODE 1 "register_operand" "")
11988 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11989 (match_operand:SI 3 "const_0_to_1_operand" "")]
11992 switch (INTVAL (operands[3]))
11995 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11999 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12003 gcc_unreachable ();
12008 (define_insn "vec_set_lo_<mode>"
12009 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12010 (vec_concat:AVX256MODE4P
12011 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12012 (vec_select:<avxhalfvecmode>
12013 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12014 (parallel [(const_int 2) (const_int 3)]))))]
12016 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12017 [(set_attr "type" "sselog")
12018 (set_attr "prefix_extra" "1")
12019 (set_attr "length_immediate" "1")
12020 (set_attr "prefix" "vex")
12021 (set_attr "mode" "V8SF")])
12023 (define_insn "vec_set_hi_<mode>"
12024 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12025 (vec_concat:AVX256MODE4P
12026 (vec_select:<avxhalfvecmode>
12027 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12028 (parallel [(const_int 0) (const_int 1)]))
12029 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12031 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12032 [(set_attr "type" "sselog")
12033 (set_attr "prefix_extra" "1")
12034 (set_attr "length_immediate" "1")
12035 (set_attr "prefix" "vex")
12036 (set_attr "mode" "V8SF")])
12038 (define_insn "vec_set_lo_<mode>"
12039 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12040 (vec_concat:AVX256MODE8P
12041 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12042 (vec_select:<avxhalfvecmode>
12043 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12044 (parallel [(const_int 4) (const_int 5)
12045 (const_int 6) (const_int 7)]))))]
12047 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12048 [(set_attr "type" "sselog")
12049 (set_attr "prefix_extra" "1")
12050 (set_attr "length_immediate" "1")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "V8SF")])
12054 (define_insn "vec_set_hi_<mode>"
12055 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12056 (vec_concat:AVX256MODE8P
12057 (vec_select:<avxhalfvecmode>
12058 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12059 (parallel [(const_int 0) (const_int 1)
12060 (const_int 2) (const_int 3)]))
12061 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12063 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12064 [(set_attr "type" "sselog")
12065 (set_attr "prefix_extra" "1")
12066 (set_attr "length_immediate" "1")
12067 (set_attr "prefix" "vex")
12068 (set_attr "mode" "V8SF")])
12070 (define_insn "vec_set_lo_v16hi"
12071 [(set (match_operand:V16HI 0 "register_operand" "=x")
12073 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12075 (match_operand:V16HI 1 "register_operand" "x")
12076 (parallel [(const_int 8) (const_int 9)
12077 (const_int 10) (const_int 11)
12078 (const_int 12) (const_int 13)
12079 (const_int 14) (const_int 15)]))))]
12081 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12082 [(set_attr "type" "sselog")
12083 (set_attr "prefix_extra" "1")
12084 (set_attr "length_immediate" "1")
12085 (set_attr "prefix" "vex")
12086 (set_attr "mode" "V8SF")])
12088 (define_insn "vec_set_hi_v16hi"
12089 [(set (match_operand:V16HI 0 "register_operand" "=x")
12092 (match_operand:V16HI 1 "register_operand" "x")
12093 (parallel [(const_int 0) (const_int 1)
12094 (const_int 2) (const_int 3)
12095 (const_int 4) (const_int 5)
12096 (const_int 6) (const_int 7)]))
12097 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12099 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12100 [(set_attr "type" "sselog")
12101 (set_attr "prefix_extra" "1")
12102 (set_attr "length_immediate" "1")
12103 (set_attr "prefix" "vex")
12104 (set_attr "mode" "V8SF")])
12106 (define_insn "vec_set_lo_v32qi"
12107 [(set (match_operand:V32QI 0 "register_operand" "=x")
12109 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12111 (match_operand:V32QI 1 "register_operand" "x")
12112 (parallel [(const_int 16) (const_int 17)
12113 (const_int 18) (const_int 19)
12114 (const_int 20) (const_int 21)
12115 (const_int 22) (const_int 23)
12116 (const_int 24) (const_int 25)
12117 (const_int 26) (const_int 27)
12118 (const_int 28) (const_int 29)
12119 (const_int 30) (const_int 31)]))))]
12121 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12122 [(set_attr "type" "sselog")
12123 (set_attr "prefix_extra" "1")
12124 (set_attr "length_immediate" "1")
12125 (set_attr "prefix" "vex")
12126 (set_attr "mode" "V8SF")])
12128 (define_insn "vec_set_hi_v32qi"
12129 [(set (match_operand:V32QI 0 "register_operand" "=x")
12132 (match_operand:V32QI 1 "register_operand" "x")
12133 (parallel [(const_int 0) (const_int 1)
12134 (const_int 2) (const_int 3)
12135 (const_int 4) (const_int 5)
12136 (const_int 6) (const_int 7)
12137 (const_int 8) (const_int 9)
12138 (const_int 10) (const_int 11)
12139 (const_int 12) (const_int 13)
12140 (const_int 14) (const_int 15)]))
12141 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12143 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12144 [(set_attr "type" "sselog")
12145 (set_attr "prefix_extra" "1")
12146 (set_attr "length_immediate" "1")
12147 (set_attr "prefix" "vex")
12148 (set_attr "mode" "V8SF")])
12150 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12151 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12153 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12154 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12158 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12159 [(set_attr "type" "sselog1")
12160 (set_attr "prefix_extra" "1")
12161 (set_attr "prefix" "vex")
12162 (set_attr "mode" "<MODE>")])
12164 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12165 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12167 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12168 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12170 UNSPEC_MASKSTORE))]
12172 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12173 [(set_attr "type" "sselog1")
12174 (set_attr "prefix_extra" "1")
12175 (set_attr "prefix" "vex")
12176 (set_attr "mode" "<MODE>")])
12178 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12179 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12180 (unspec:AVX256MODE2P
12181 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12185 "&& reload_completed"
12188 rtx op1 = operands[1];
12190 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12192 op1 = gen_lowpart (<MODE>mode, op1);
12193 emit_move_insn (operands[0], op1);
12197 (define_expand "vec_init<mode>"
12198 [(match_operand:AVX256MODE 0 "register_operand" "")
12199 (match_operand 1 "" "")]
12202 ix86_expand_vector_init (false, operands[0], operands[1]);
12206 (define_insn "*vec_concat<mode>_avx"
12207 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12208 (vec_concat:AVX256MODE
12209 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12210 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12213 switch (which_alternative)
12216 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12218 switch (get_attr_mode (insn))
12221 return "vmovaps\t{%1, %x0|%x0, %1}";
12223 return "vmovapd\t{%1, %x0|%x0, %1}";
12225 return "vmovdqa\t{%1, %x0|%x0, %1}";
12228 gcc_unreachable ();
12231 [(set_attr "type" "sselog,ssemov")
12232 (set_attr "prefix_extra" "1,*")
12233 (set_attr "length_immediate" "1,*")
12234 (set_attr "prefix" "vex")
12235 (set_attr "mode" "<avxvecmode>")])
12237 (define_insn "vcvtph2ps"
12238 [(set (match_operand:V4SF 0 "register_operand" "=x")
12240 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12242 (parallel [(const_int 0) (const_int 1)
12243 (const_int 1) (const_int 2)])))]
12245 "vcvtph2ps\t{%1, %0|%0, %1}"
12246 [(set_attr "type" "ssecvt")
12247 (set_attr "prefix" "vex")
12248 (set_attr "mode" "V4SF")])
12250 (define_insn "*vcvtph2ps_load"
12251 [(set (match_operand:V4SF 0 "register_operand" "=x")
12252 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12253 UNSPEC_VCVTPH2PS))]
12255 "vcvtph2ps\t{%1, %0|%0, %1}"
12256 [(set_attr "type" "ssecvt")
12257 (set_attr "prefix" "vex")
12258 (set_attr "mode" "V8SF")])
12260 (define_insn "vcvtph2ps256"
12261 [(set (match_operand:V8SF 0 "register_operand" "=x")
12262 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12263 UNSPEC_VCVTPH2PS))]
12265 "vcvtph2ps\t{%1, %0|%0, %1}"
12266 [(set_attr "type" "ssecvt")
12267 (set_attr "prefix" "vex")
12268 (set_attr "mode" "V8SF")])
12270 (define_expand "vcvtps2ph"
12271 [(set (match_operand:V8HI 0 "register_operand" "")
12273 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12274 (match_operand:SI 2 "immediate_operand" "")]
12278 "operands[3] = CONST0_RTX (V4HImode);")
12280 (define_insn "*vcvtps2ph"
12281 [(set (match_operand:V8HI 0 "register_operand" "=x")
12283 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12284 (match_operand:SI 2 "immediate_operand" "N")]
12286 (match_operand:V4HI 3 "const0_operand" "")))]
12288 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12289 [(set_attr "type" "ssecvt")
12290 (set_attr "prefix" "vex")
12291 (set_attr "mode" "V4SF")])
12293 (define_insn "*vcvtps2ph_store"
12294 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12295 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12296 (match_operand:SI 2 "immediate_operand" "N")]
12297 UNSPEC_VCVTPS2PH))]
12299 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12300 [(set_attr "type" "ssecvt")
12301 (set_attr "prefix" "vex")
12302 (set_attr "mode" "V4SF")])
12304 (define_insn "vcvtps2ph256"
12305 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12306 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12307 (match_operand:SI 2 "immediate_operand" "N")]
12308 UNSPEC_VCVTPS2PH))]
12310 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12311 [(set_attr "type" "ssecvt")
12312 (set_attr "prefix" "vex")
12313 (set_attr "mode" "V8SF")])