1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
60 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
61 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
62 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
63 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
64 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
65 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
66 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
67 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
68 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
69 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
72 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
74 ;; Int-float size matches
75 (define_mode_iterator SSEMODE4S [V4SF V4SI])
76 (define_mode_iterator SSEMODE2D [V2DF V2DI])
78 ;; Modes handled by integer vcond pattern
79 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
80 (V2DI "TARGET_SSE4_2")])
82 ;; Modes handled by vec_extract_even/odd pattern.
83 (define_mode_iterator SSEMODE_EO
86 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
87 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
88 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
90 ;; Modes handled by storent patterns.
91 (define_mode_iterator STORENT_MODE
92 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
93 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
95 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
97 ;; Modes handled by vector float patterns.
98 (define_mode_iterator VEC_FLOAT_MODE
99 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
100 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
102 ;; Modes handled by vector extract patterns.
103 (define_mode_iterator VEC_EXTRACT_MODE
104 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
105 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
106 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
107 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
109 ;; Mapping from float mode to required SSE level
110 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
112 ;; Mapping from integer vector mode to mnemonic suffix
113 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
115 ;; Mapping of the insn mnemonic suffix
116 (define_mode_attr ssemodesuffix
117 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
118 (V8SI "ps") (V4DI "pd")])
119 (define_mode_attr ssescalarmodesuffix
120 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
121 (V4DF "sd") (V4SI "d") (V4DI "sd")])
123 ;; Mapping of the max integer size for xop rotate immediate constraint
124 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
126 ;; Mapping of vector modes back to the scalar modes
127 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
128 (V16QI "QI") (V8HI "HI")
129 (V4SI "SI") (V2DI "DI")])
131 ;; Mapping of vector modes to a vector mode of double size
132 (define_mode_attr ssedoublesizemode
133 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
134 (V8HI "V16HI") (V16QI "V32QI")
135 (V4DF "V8DF") (V8SF "V16SF")
136 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
138 ;; Number of scalar elements in each vector type
139 (define_mode_attr ssescalarnum
140 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
141 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
144 (define_mode_attr avxvecmode
145 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
146 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
147 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
148 (define_mode_attr avxvecpsmode
149 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
150 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
151 (define_mode_attr avxhalfvecmode
152 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
153 (V8SF "V4SF") (V4DF "V2DF")
154 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
155 (define_mode_attr avxscalarmode
156 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
157 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
158 (define_mode_attr avxcvtvecmode
159 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
160 (define_mode_attr avxpermvecmode
161 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
162 (define_mode_attr avxmodesuffixp
163 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
165 (define_mode_attr avxmodesuffix
166 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
167 (V8SI "256") (V8SF "256") (V4DF "256")])
169 ;; Mapping of immediate bits for blend instructions
170 (define_mode_attr blendbits
171 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
173 ;; Mapping of immediate bits for pinsr instructions
174 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
176 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184 (define_expand "mov<mode>"
185 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
186 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (<MODE>mode, operands);
193 (define_insn "*avx_mov<mode>_internal"
194 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
195 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
197 && (register_operand (operands[0], <MODE>mode)
198 || register_operand (operands[1], <MODE>mode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 switch (get_attr_mode (insn))
210 return "vmovaps\t{%1, %0|%0, %1}";
213 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
214 return "vmovaps\t{%1, %0|%0, %1}";
216 return "vmovapd\t{%1, %0|%0, %1}";
218 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
219 return "vmovaps\t{%1, %0|%0, %1}";
221 return "vmovdqa\t{%1, %0|%0, %1}";
227 [(set_attr "type" "sselog1,ssemov,ssemov")
228 (set_attr "prefix" "vex")
229 (set_attr "mode" "<avxvecmode>")])
231 ;; All of these patterns are enabled for SSE1 as well as SSE2.
232 ;; This is essential for maintaining stable calling conventions.
234 (define_expand "mov<mode>"
235 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
236 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
239 ix86_expand_vector_move (<MODE>mode, operands);
243 (define_insn "*mov<mode>_internal"
244 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
245 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
247 && (register_operand (operands[0], <MODE>mode)
248 || register_operand (operands[1], <MODE>mode))"
250 switch (which_alternative)
253 return standard_sse_constant_opcode (insn, operands[1]);
256 switch (get_attr_mode (insn))
259 return "movaps\t{%1, %0|%0, %1}";
261 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
262 return "movaps\t{%1, %0|%0, %1}";
264 return "movapd\t{%1, %0|%0, %1}";
266 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
267 return "movaps\t{%1, %0|%0, %1}";
269 return "movdqa\t{%1, %0|%0, %1}";
275 [(set_attr "type" "sselog1,ssemov,ssemov")
277 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
278 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
279 (and (eq_attr "alternative" "2")
280 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
282 (const_string "V4SF")
283 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
284 (const_string "V4SF")
285 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
286 (const_string "V2DF")
288 (const_string "TI")))])
290 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
291 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
292 ;; from memory, we'd prefer to load the memory directly into the %xmm
293 ;; register. To facilitate this happy circumstance, this pattern won't
294 ;; split until after register allocation. If the 64-bit value didn't
295 ;; come from memory, this is the best we can do. This is much better
296 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
299 (define_insn_and_split "movdi_to_sse"
301 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
302 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
303 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
304 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
306 "&& reload_completed"
309 if (register_operand (operands[1], DImode))
311 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
312 Assemble the 64-bit DImode value in an xmm register. */
313 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
314 gen_rtx_SUBREG (SImode, operands[1], 0)));
315 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
316 gen_rtx_SUBREG (SImode, operands[1], 4)));
317 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
320 else if (memory_operand (operands[1], DImode))
321 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
322 operands[1], const0_rtx));
328 [(set (match_operand:V4SF 0 "register_operand" "")
329 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
330 "TARGET_SSE && reload_completed"
333 (vec_duplicate:V4SF (match_dup 1))
337 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
338 operands[2] = CONST0_RTX (V4SFmode);
342 [(set (match_operand:V2DF 0 "register_operand" "")
343 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
344 "TARGET_SSE2 && reload_completed"
345 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
347 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
348 operands[2] = CONST0_RTX (DFmode);
351 (define_expand "push<mode>1"
352 [(match_operand:AVX256MODE 0 "register_operand" "")]
355 ix86_expand_push (<MODE>mode, operands[0]);
359 (define_expand "push<mode>1"
360 [(match_operand:SSEMODE16 0 "register_operand" "")]
363 ix86_expand_push (<MODE>mode, operands[0]);
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
369 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
376 (define_expand "movmisalign<mode>"
377 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
378 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
381 ix86_expand_vector_move_misalign (<MODE>mode, operands);
385 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
386 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
388 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
390 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
392 if (MEM_P (operands[0]) && MEM_P (operands[1]))
393 operands[1] = force_reg (<MODE>mode, operands[1]);
396 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
397 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
399 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
401 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
402 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
403 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
404 [(set_attr "type" "ssemov")
405 (set_attr "movu" "1")
406 (set_attr "prefix" "vex")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "sse2_movq128"
410 [(set (match_operand:V2DI 0 "register_operand" "=x")
413 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
414 (parallel [(const_int 0)]))
417 "%vmovq\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "prefix" "maybe_vex")
420 (set_attr "mode" "TI")])
422 (define_expand "<sse>_movu<ssemodesuffix>"
423 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
425 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
429 if (MEM_P (operands[0]) && MEM_P (operands[1]))
430 operands[1] = force_reg (<MODE>mode, operands[1]);
433 (define_insn "*<sse>_movu<ssemodesuffix>"
434 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
436 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
438 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
439 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
440 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "movu" "1")
443 (set_attr "mode" "<MODE>")])
445 (define_expand "avx_movdqu<avxmodesuffix>"
446 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
448 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
452 if (MEM_P (operands[0]) && MEM_P (operands[1]))
453 operands[1] = force_reg (<MODE>mode, operands[1]);
456 (define_insn "*avx_movdqu<avxmodesuffix>"
457 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
459 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
461 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
462 "vmovdqu\t{%1, %0|%0, %1}"
463 [(set_attr "type" "ssemov")
464 (set_attr "movu" "1")
465 (set_attr "prefix" "vex")
466 (set_attr "mode" "<avxvecmode>")])
468 (define_expand "sse2_movdqu"
469 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
470 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
474 if (MEM_P (operands[0]) && MEM_P (operands[1]))
475 operands[1] = force_reg (V16QImode, operands[1]);
478 (define_insn "*sse2_movdqu"
479 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
480 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
482 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
483 "movdqu\t{%1, %0|%0, %1}"
484 [(set_attr "type" "ssemov")
485 (set_attr "movu" "1")
486 (set_attr "prefix_data16" "1")
487 (set_attr "mode" "TI")])
489 (define_insn "avx_movnt<mode>"
490 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
492 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
495 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "prefix" "vex")
498 (set_attr "mode" "<MODE>")])
500 (define_insn "<sse>_movnt<mode>"
501 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
503 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
507 [(set_attr "type" "ssemov")
508 (set_attr "mode" "<MODE>")])
510 (define_insn "avx_movnt<mode>"
511 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
513 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
516 "vmovntdq\t{%1, %0|%0, %1}"
517 [(set_attr "type" "ssecvt")
518 (set_attr "prefix" "vex")
519 (set_attr "mode" "<avxvecmode>")])
521 (define_insn "sse2_movntv2di"
522 [(set (match_operand:V2DI 0 "memory_operand" "=m")
523 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
526 "movntdq\t{%1, %0|%0, %1}"
527 [(set_attr "type" "ssemov")
528 (set_attr "prefix_data16" "1")
529 (set_attr "mode" "TI")])
531 (define_insn "sse2_movntsi"
532 [(set (match_operand:SI 0 "memory_operand" "=m")
533 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
536 "movnti\t{%1, %0|%0, %1}"
537 [(set_attr "type" "ssemov")
538 (set_attr "prefix_data16" "0")
539 (set_attr "mode" "V2DF")])
541 (define_insn "avx_lddqu<avxmodesuffix>"
542 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
544 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
547 "vlddqu\t{%1, %0|%0, %1}"
548 [(set_attr "type" "ssecvt")
549 (set_attr "movu" "1")
550 (set_attr "prefix" "vex")
551 (set_attr "mode" "<avxvecmode>")])
553 (define_insn "sse3_lddqu"
554 [(set (match_operand:V16QI 0 "register_operand" "=x")
555 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
558 "lddqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set_attr "prefix_data16" "0")
562 (set_attr "prefix_rep" "1")
563 (set_attr "mode" "TI")])
565 ; Expand patterns for non-temporal stores. At the moment, only those
566 ; that directly map to insns are defined; it would be possible to
567 ; define patterns for other modes that would expand to several insns.
569 (define_expand "storent<mode>"
570 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
572 [(match_operand:STORENT_MODE 1 "register_operand" "")]
575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
577 ;; Parallel floating point arithmetic
579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
581 (define_expand "<code><mode>2"
582 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
583 (absneg:VEC_FLOAT_MODE
584 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
586 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
588 (define_expand "<plusminus_insn><mode>3"
589 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
590 (plusminus:AVX256MODEF2P
591 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
592 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
593 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
594 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
596 (define_insn "*avx_<plusminus_insn><mode>3"
597 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
598 (plusminus:AVXMODEF2P
599 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
600 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
601 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
602 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
603 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "prefix" "vex")
606 (set_attr "mode" "<avxvecmode>")])
608 (define_expand "<plusminus_insn><mode>3"
609 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
610 (plusminus:SSEMODEF2P
611 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
612 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
613 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
614 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
616 (define_insn "*<plusminus_insn><mode>3"
617 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
618 (plusminus:SSEMODEF2P
619 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
620 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
621 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
622 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
623 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
624 [(set_attr "type" "sseadd")
625 (set_attr "mode" "<MODE>")])
627 (define_insn "*avx_vm<plusminus_insn><mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
629 (vec_merge:SSEMODEF2P
630 (plusminus:SSEMODEF2P
631 (match_operand:SSEMODEF2P 1 "register_operand" "x")
632 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
635 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
636 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
637 [(set_attr "type" "sseadd")
638 (set_attr "prefix" "vex")
639 (set_attr "mode" "<ssescalarmode>")])
641 (define_insn "<sse>_vm<plusminus_insn><mode>3"
642 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
643 (vec_merge:SSEMODEF2P
644 (plusminus:SSEMODEF2P
645 (match_operand:SSEMODEF2P 1 "register_operand" "0")
646 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
649 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
650 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
651 [(set_attr "type" "sseadd")
652 (set_attr "mode" "<ssescalarmode>")])
654 (define_expand "mul<mode>3"
655 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
657 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
658 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
659 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
660 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
662 (define_insn "*avx_mul<mode>3"
663 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
665 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
666 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
667 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
668 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
669 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "prefix" "vex")
672 (set_attr "mode" "<avxvecmode>")])
674 (define_expand "mul<mode>3"
675 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
677 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
678 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
679 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
680 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
682 (define_insn "*mul<mode>3"
683 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
685 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
686 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
687 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
688 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
689 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
690 [(set_attr "type" "ssemul")
691 (set_attr "mode" "<MODE>")])
693 (define_insn "*avx_vmmul<mode>3"
694 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
695 (vec_merge:SSEMODEF2P
697 (match_operand:SSEMODEF2P 1 "register_operand" "x")
698 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
701 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
702 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
703 [(set_attr "type" "ssemul")
704 (set_attr "prefix" "vex")
705 (set_attr "mode" "<ssescalarmode>")])
707 (define_insn "<sse>_vmmul<mode>3"
708 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
709 (vec_merge:SSEMODEF2P
711 (match_operand:SSEMODEF2P 1 "register_operand" "0")
712 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
715 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
716 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
717 [(set_attr "type" "ssemul")
718 (set_attr "mode" "<ssescalarmode>")])
720 (define_expand "divv8sf3"
721 [(set (match_operand:V8SF 0 "register_operand" "")
722 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
723 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
726 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
728 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
729 && flag_finite_math_only && !flag_trapping_math
730 && flag_unsafe_math_optimizations)
732 ix86_emit_swdivsf (operands[0], operands[1],
733 operands[2], V8SFmode);
738 (define_expand "divv4df3"
739 [(set (match_operand:V4DF 0 "register_operand" "")
740 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
741 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
743 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
745 (define_insn "avx_div<mode>3"
746 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
748 (match_operand:AVXMODEF2P 1 "register_operand" "x")
749 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
750 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
751 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
752 [(set_attr "type" "ssediv")
753 (set_attr "prefix" "vex")
754 (set_attr "mode" "<MODE>")])
756 (define_expand "divv4sf3"
757 [(set (match_operand:V4SF 0 "register_operand" "")
758 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
759 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
762 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
763 && flag_finite_math_only && !flag_trapping_math
764 && flag_unsafe_math_optimizations)
766 ix86_emit_swdivsf (operands[0], operands[1],
767 operands[2], V4SFmode);
772 (define_expand "divv2df3"
773 [(set (match_operand:V2DF 0 "register_operand" "")
774 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
775 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
778 (define_insn "*avx_div<mode>3"
779 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
781 (match_operand:SSEMODEF2P 1 "register_operand" "x")
782 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
783 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
784 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
785 [(set_attr "type" "ssediv")
786 (set_attr "prefix" "vex")
787 (set_attr "mode" "<MODE>")])
789 (define_insn "<sse>_div<mode>3"
790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
792 (match_operand:SSEMODEF2P 1 "register_operand" "0")
793 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
794 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
795 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
796 [(set_attr "type" "ssediv")
797 (set_attr "mode" "<MODE>")])
799 (define_insn "*avx_vmdiv<mode>3"
800 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
801 (vec_merge:SSEMODEF2P
803 (match_operand:SSEMODEF2P 1 "register_operand" "x")
804 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
807 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
808 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
809 [(set_attr "type" "ssediv")
810 (set_attr "prefix" "vex")
811 (set_attr "mode" "<ssescalarmode>")])
813 (define_insn "<sse>_vmdiv<mode>3"
814 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
815 (vec_merge:SSEMODEF2P
817 (match_operand:SSEMODEF2P 1 "register_operand" "0")
818 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
821 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
822 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
823 [(set_attr "type" "ssediv")
824 (set_attr "mode" "<ssescalarmode>")])
826 (define_insn "avx_rcpv8sf2"
827 [(set (match_operand:V8SF 0 "register_operand" "=x")
829 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
831 "vrcpps\t{%1, %0|%0, %1}"
832 [(set_attr "type" "sse")
833 (set_attr "prefix" "vex")
834 (set_attr "mode" "V8SF")])
836 (define_insn "sse_rcpv4sf2"
837 [(set (match_operand:V4SF 0 "register_operand" "=x")
839 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
841 "%vrcpps\t{%1, %0|%0, %1}"
842 [(set_attr "type" "sse")
843 (set_attr "atom_sse_attr" "rcp")
844 (set_attr "prefix" "maybe_vex")
845 (set_attr "mode" "V4SF")])
847 (define_insn "*avx_vmrcpv4sf2"
848 [(set (match_operand:V4SF 0 "register_operand" "=x")
850 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
852 (match_operand:V4SF 2 "register_operand" "x")
855 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
856 [(set_attr "type" "sse")
857 (set_attr "prefix" "vex")
858 (set_attr "mode" "SF")])
860 (define_insn "sse_vmrcpv4sf2"
861 [(set (match_operand:V4SF 0 "register_operand" "=x")
863 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
865 (match_operand:V4SF 2 "register_operand" "0")
868 "rcpss\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "rcp")
871 (set_attr "mode" "SF")])
873 (define_expand "sqrtv8sf2"
874 [(set (match_operand:V8SF 0 "register_operand" "")
875 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
878 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
879 && flag_finite_math_only && !flag_trapping_math
880 && flag_unsafe_math_optimizations)
882 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
887 (define_insn "avx_sqrtv8sf2"
888 [(set (match_operand:V8SF 0 "register_operand" "=x")
889 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
891 "vsqrtps\t{%1, %0|%0, %1}"
892 [(set_attr "type" "sse")
893 (set_attr "prefix" "vex")
894 (set_attr "mode" "V8SF")])
896 (define_expand "sqrtv4sf2"
897 [(set (match_operand:V4SF 0 "register_operand" "")
898 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
901 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
902 && flag_finite_math_only && !flag_trapping_math
903 && flag_unsafe_math_optimizations)
905 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
910 (define_insn "sse_sqrtv4sf2"
911 [(set (match_operand:V4SF 0 "register_operand" "=x")
912 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
914 "%vsqrtps\t{%1, %0|%0, %1}"
915 [(set_attr "type" "sse")
916 (set_attr "atom_sse_attr" "sqrt")
917 (set_attr "prefix" "maybe_vex")
918 (set_attr "mode" "V4SF")])
920 (define_insn "sqrtv4df2"
921 [(set (match_operand:V4DF 0 "register_operand" "=x")
922 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
924 "vsqrtpd\t{%1, %0|%0, %1}"
925 [(set_attr "type" "sse")
926 (set_attr "prefix" "vex")
927 (set_attr "mode" "V4DF")])
929 (define_insn "sqrtv2df2"
930 [(set (match_operand:V2DF 0 "register_operand" "=x")
931 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
933 "%vsqrtpd\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "maybe_vex")
936 (set_attr "mode" "V2DF")])
938 (define_insn "*avx_vmsqrt<mode>2"
939 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
940 (vec_merge:SSEMODEF2P
942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
943 (match_operand:SSEMODEF2P 2 "register_operand" "x")
945 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
946 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
947 [(set_attr "type" "sse")
948 (set_attr "prefix" "vex")
949 (set_attr "mode" "<ssescalarmode>")])
951 (define_insn "<sse>_vmsqrt<mode>2"
952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
953 (vec_merge:SSEMODEF2P
955 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
956 (match_operand:SSEMODEF2P 2 "register_operand" "0")
958 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
959 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
960 [(set_attr "type" "sse")
961 (set_attr "atom_sse_attr" "sqrt")
962 (set_attr "mode" "<ssescalarmode>")])
964 (define_expand "rsqrtv8sf2"
965 [(set (match_operand:V8SF 0 "register_operand" "")
967 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
968 "TARGET_AVX && TARGET_SSE_MATH"
970 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
974 (define_insn "avx_rsqrtv8sf2"
975 [(set (match_operand:V8SF 0 "register_operand" "=x")
977 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
979 "vrsqrtps\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "prefix" "vex")
982 (set_attr "mode" "V8SF")])
984 (define_expand "rsqrtv4sf2"
985 [(set (match_operand:V4SF 0 "register_operand" "")
987 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
990 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
994 (define_insn "sse_rsqrtv4sf2"
995 [(set (match_operand:V4SF 0 "register_operand" "=x")
997 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
999 "%vrsqrtps\t{%1, %0|%0, %1}"
1000 [(set_attr "type" "sse")
1001 (set_attr "prefix" "maybe_vex")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*avx_vmrsqrtv4sf2"
1005 [(set (match_operand:V4SF 0 "register_operand" "=x")
1007 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1009 (match_operand:V4SF 2 "register_operand" "x")
1012 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1013 [(set_attr "type" "sse")
1014 (set_attr "prefix" "vex")
1015 (set_attr "mode" "SF")])
1017 (define_insn "sse_vmrsqrtv4sf2"
1018 [(set (match_operand:V4SF 0 "register_operand" "=x")
1020 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1022 (match_operand:V4SF 2 "register_operand" "0")
1025 "rsqrtss\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "sse")
1027 (set_attr "mode" "SF")])
1029 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1030 ;; isn't really correct, as those rtl operators aren't defined when
1031 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1033 (define_expand "<code><mode>3"
1034 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1035 (smaxmin:AVX256MODEF2P
1036 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1037 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1038 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 if (!flag_finite_math_only)
1041 operands[1] = force_reg (<MODE>mode, operands[1]);
1042 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1045 (define_expand "<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1048 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1052 if (!flag_finite_math_only)
1053 operands[1] = force_reg (<MODE>mode, operands[1]);
1054 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1057 (define_insn "*avx_<code><mode>3_finite"
1058 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1060 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1061 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1062 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1063 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1064 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sseadd")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<MODE>")])
1069 (define_insn "*<code><mode>3_finite"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1072 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1073 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1074 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1075 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1076 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseadd")
1078 (set_attr "mode" "<MODE>")])
1080 (define_insn "*avx_<code><mode>3"
1081 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1083 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1084 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1085 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1086 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1087 [(set_attr "type" "sseadd")
1088 (set_attr "prefix" "vex")
1089 (set_attr "mode" "<avxvecmode>")])
1091 (define_insn "*<code><mode>3"
1092 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1094 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1095 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1096 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1097 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "mode" "<MODE>")])
1101 (define_insn "*avx_vm<code><mode>3"
1102 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1103 (vec_merge:SSEMODEF2P
1105 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1106 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1109 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1110 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sse")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "<ssescalarmode>")])
1115 (define_insn "<sse>_vm<code><mode>3"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1117 (vec_merge:SSEMODEF2P
1119 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1120 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1123 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1124 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "mode" "<ssescalarmode>")])
1128 ;; These versions of the min/max patterns implement exactly the operations
1129 ;; min = (op1 < op2 ? op1 : op2)
1130 ;; max = (!(op1 < op2) ? op1 : op2)
1131 ;; Their operands are not commutative, and thus they may be used in the
1132 ;; presence of -0.0 and NaN.
1134 (define_insn "*avx_ieee_smin<mode>3"
1135 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1137 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1138 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1140 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1141 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1142 [(set_attr "type" "sseadd")
1143 (set_attr "prefix" "vex")
1144 (set_attr "mode" "<avxvecmode>")])
1146 (define_insn "*avx_ieee_smax<mode>3"
1147 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1149 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1150 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1152 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1153 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "type" "sseadd")
1155 (set_attr "prefix" "vex")
1156 (set_attr "mode" "<avxvecmode>")])
1158 (define_insn "*ieee_smin<mode>3"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1161 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1162 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1164 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1165 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1166 [(set_attr "type" "sseadd")
1167 (set_attr "mode" "<MODE>")])
1169 (define_insn "*ieee_smax<mode>3"
1170 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1172 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1173 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1175 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1176 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "mode" "<MODE>")])
1180 (define_insn "avx_addsubv8sf3"
1181 [(set (match_operand:V8SF 0 "register_operand" "=x")
1184 (match_operand:V8SF 1 "register_operand" "x")
1185 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1186 (minus:V8SF (match_dup 1) (match_dup 2))
1189 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1190 [(set_attr "type" "sseadd")
1191 (set_attr "prefix" "vex")
1192 (set_attr "mode" "V8SF")])
1194 (define_insn "avx_addsubv4df3"
1195 [(set (match_operand:V4DF 0 "register_operand" "=x")
1198 (match_operand:V4DF 1 "register_operand" "x")
1199 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1200 (minus:V4DF (match_dup 1) (match_dup 2))
1203 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1204 [(set_attr "type" "sseadd")
1205 (set_attr "prefix" "vex")
1206 (set_attr "mode" "V4DF")])
1208 (define_insn "*avx_addsubv4sf3"
1209 [(set (match_operand:V4SF 0 "register_operand" "=x")
1212 (match_operand:V4SF 1 "register_operand" "x")
1213 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1214 (minus:V4SF (match_dup 1) (match_dup 2))
1217 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1218 [(set_attr "type" "sseadd")
1219 (set_attr "prefix" "vex")
1220 (set_attr "mode" "V4SF")])
1222 (define_insn "sse3_addsubv4sf3"
1223 [(set (match_operand:V4SF 0 "register_operand" "=x")
1226 (match_operand:V4SF 1 "register_operand" "0")
1227 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1228 (minus:V4SF (match_dup 1) (match_dup 2))
1231 "addsubps\t{%2, %0|%0, %2}"
1232 [(set_attr "type" "sseadd")
1233 (set_attr "prefix_rep" "1")
1234 (set_attr "mode" "V4SF")])
1236 (define_insn "*avx_addsubv2df3"
1237 [(set (match_operand:V2DF 0 "register_operand" "=x")
1240 (match_operand:V2DF 1 "register_operand" "x")
1241 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1242 (minus:V2DF (match_dup 1) (match_dup 2))
1245 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1246 [(set_attr "type" "sseadd")
1247 (set_attr "prefix" "vex")
1248 (set_attr "mode" "V2DF")])
1250 (define_insn "sse3_addsubv2df3"
1251 [(set (match_operand:V2DF 0 "register_operand" "=x")
1254 (match_operand:V2DF 1 "register_operand" "0")
1255 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1256 (minus:V2DF (match_dup 1) (match_dup 2))
1259 "addsubpd\t{%2, %0|%0, %2}"
1260 [(set_attr "type" "sseadd")
1261 (set_attr "atom_unit" "complex")
1262 (set_attr "mode" "V2DF")])
1264 (define_insn "avx_h<plusminus_insn>v4df3"
1265 [(set (match_operand:V4DF 0 "register_operand" "=x")
1270 (match_operand:V4DF 1 "register_operand" "x")
1271 (parallel [(const_int 0)]))
1272 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1274 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1275 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1279 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1280 (parallel [(const_int 0)]))
1281 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1283 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1284 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1286 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1287 [(set_attr "type" "sseadd")
1288 (set_attr "prefix" "vex")
1289 (set_attr "mode" "V4DF")])
1291 (define_insn "avx_h<plusminus_insn>v8sf3"
1292 [(set (match_operand:V8SF 0 "register_operand" "=x")
1298 (match_operand:V8SF 1 "register_operand" "x")
1299 (parallel [(const_int 0)]))
1300 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1302 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1303 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1307 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1308 (parallel [(const_int 0)]))
1309 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1311 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1312 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1316 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1317 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1319 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1320 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1323 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1324 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1326 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1327 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1329 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1330 [(set_attr "type" "sseadd")
1331 (set_attr "prefix" "vex")
1332 (set_attr "mode" "V8SF")])
1334 (define_insn "*avx_h<plusminus_insn>v4sf3"
1335 [(set (match_operand:V4SF 0 "register_operand" "=x")
1340 (match_operand:V4SF 1 "register_operand" "x")
1341 (parallel [(const_int 0)]))
1342 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1344 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1345 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1349 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1350 (parallel [(const_int 0)]))
1351 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1353 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1354 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1356 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1357 [(set_attr "type" "sseadd")
1358 (set_attr "prefix" "vex")
1359 (set_attr "mode" "V4SF")])
1361 (define_insn "sse3_h<plusminus_insn>v4sf3"
1362 [(set (match_operand:V4SF 0 "register_operand" "=x")
1367 (match_operand:V4SF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1371 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1372 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1376 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1377 (parallel [(const_int 0)]))
1378 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1380 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1381 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1383 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sseadd")
1385 (set_attr "atom_unit" "complex")
1386 (set_attr "prefix_rep" "1")
1387 (set_attr "mode" "V4SF")])
1389 (define_insn "*avx_h<plusminus_insn>v2df3"
1390 [(set (match_operand:V2DF 0 "register_operand" "=x")
1394 (match_operand:V2DF 1 "register_operand" "x")
1395 (parallel [(const_int 0)]))
1396 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1399 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1400 (parallel [(const_int 0)]))
1401 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1403 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1404 [(set_attr "type" "sseadd")
1405 (set_attr "prefix" "vex")
1406 (set_attr "mode" "V2DF")])
1408 (define_insn "sse3_h<plusminus_insn>v2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "=x")
1413 (match_operand:V2DF 1 "register_operand" "0")
1414 (parallel [(const_int 0)]))
1415 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1419 (parallel [(const_int 0)]))
1420 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1422 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1423 [(set_attr "type" "sseadd")
1424 (set_attr "mode" "V2DF")])
1426 (define_expand "reduc_splus_v8sf"
1427 [(match_operand:V8SF 0 "register_operand" "")
1428 (match_operand:V8SF 1 "register_operand" "")]
1431 rtx tmp = gen_reg_rtx (V8SFmode);
1432 rtx tmp2 = gen_reg_rtx (V8SFmode);
1433 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1434 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1435 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1436 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1440 (define_expand "reduc_splus_v4sf"
1441 [(match_operand:V4SF 0 "register_operand" "")
1442 (match_operand:V4SF 1 "register_operand" "")]
1447 rtx tmp = gen_reg_rtx (V4SFmode);
1448 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1449 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1452 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1456 (define_expand "reduc_splus_v4df"
1457 [(match_operand:V4DF 0 "register_operand" "")
1458 (match_operand:V4DF 1 "register_operand" "")]
1461 rtx tmp = gen_reg_rtx (V4DFmode);
1462 rtx tmp2 = gen_reg_rtx (V4DFmode);
1463 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1464 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1465 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1469 (define_expand "reduc_splus_v2df"
1470 [(match_operand:V2DF 0 "register_operand" "")
1471 (match_operand:V2DF 1 "register_operand" "")]
1474 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1478 (define_expand "reduc_smax_v4sf"
1479 [(match_operand:V4SF 0 "register_operand" "")
1480 (match_operand:V4SF 1 "register_operand" "")]
1483 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1487 (define_expand "reduc_smin_v4sf"
1488 [(match_operand:V4SF 0 "register_operand" "")
1489 (match_operand:V4SF 1 "register_operand" "")]
1492 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1498 ;; Parallel floating point comparisons
1500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1502 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1503 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1505 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1506 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1507 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1510 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1511 [(set_attr "type" "ssecmp")
1512 (set_attr "length_immediate" "1")
1513 (set_attr "prefix" "vex")
1514 (set_attr "mode" "<MODE>")])
1516 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1517 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1518 (vec_merge:SSEMODEF2P
1520 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1521 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1522 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1527 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1528 [(set_attr "type" "ssecmp")
1529 (set_attr "length_immediate" "1")
1530 (set_attr "prefix" "vex")
1531 (set_attr "mode" "<ssescalarmode>")])
1533 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1534 ;; may generate 256bit vector compare instructions.
1535 (define_insn "*avx_maskcmp<mode>3"
1536 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1537 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1538 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1539 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1540 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1541 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1542 [(set_attr "type" "ssecmp")
1543 (set_attr "prefix" "vex")
1544 (set_attr "length_immediate" "1")
1545 (set_attr "mode" "<avxvecmode>")])
1547 (define_insn "<sse>_maskcmp<mode>3"
1548 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1549 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1550 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1551 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1553 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1554 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "ssecmp")
1556 (set_attr "length_immediate" "1")
1557 (set_attr "mode" "<MODE>")])
1559 (define_insn "*avx_vmmaskcmp<mode>3"
1560 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1561 (vec_merge:SSEMODEF2P
1562 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1563 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1564 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1567 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1568 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1569 [(set_attr "type" "ssecmp")
1570 (set_attr "prefix" "vex")
1571 (set_attr "mode" "<ssescalarmode>")])
1573 (define_insn "<sse>_vmmaskcmp<mode>3"
1574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1575 (vec_merge:SSEMODEF2P
1576 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1577 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1581 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1582 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1583 [(set_attr "type" "ssecmp")
1584 (set_attr "length_immediate" "1")
1585 (set_attr "mode" "<ssescalarmode>")])
1587 (define_insn "<sse>_comi"
1588 [(set (reg:CCFP FLAGS_REG)
1591 (match_operand:<ssevecmode> 0 "register_operand" "x")
1592 (parallel [(const_int 0)]))
1594 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1595 (parallel [(const_int 0)]))))]
1596 "SSE_FLOAT_MODE_P (<MODE>mode)"
1597 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1598 [(set_attr "type" "ssecomi")
1599 (set_attr "prefix" "maybe_vex")
1600 (set_attr "prefix_rep" "0")
1601 (set (attr "prefix_data16")
1602 (if_then_else (eq_attr "mode" "DF")
1604 (const_string "0")))
1605 (set_attr "mode" "<MODE>")])
1607 (define_insn "<sse>_ucomi"
1608 [(set (reg:CCFPU FLAGS_REG)
1611 (match_operand:<ssevecmode> 0 "register_operand" "x")
1612 (parallel [(const_int 0)]))
1614 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1615 (parallel [(const_int 0)]))))]
1616 "SSE_FLOAT_MODE_P (<MODE>mode)"
1617 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1618 [(set_attr "type" "ssecomi")
1619 (set_attr "prefix" "maybe_vex")
1620 (set_attr "prefix_rep" "0")
1621 (set (attr "prefix_data16")
1622 (if_then_else (eq_attr "mode" "DF")
1624 (const_string "0")))
1625 (set_attr "mode" "<MODE>")])
1627 (define_expand "vcond<mode>"
1628 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1629 (if_then_else:AVXMODEF2P
1630 (match_operator 3 ""
1631 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1632 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1633 (match_operand:AVXMODEF2P 1 "general_operand" "")
1634 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1635 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1636 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1638 bool ok = ix86_expand_fp_vcond (operands);
1643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1645 ;; Parallel floating point logical operations
1647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1649 (define_insn "avx_andnot<mode>3"
1650 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1653 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1654 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1655 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1656 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1657 [(set_attr "type" "sselog")
1658 (set_attr "prefix" "vex")
1659 (set_attr "mode" "<avxvecmode>")])
1661 (define_insn "<sse>_andnot<mode>3"
1662 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1665 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1666 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1667 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1668 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1669 [(set_attr "type" "sselog")
1670 (set_attr "mode" "<MODE>")])
1672 (define_expand "<code><mode>3"
1673 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1674 (any_logic:AVX256MODEF2P
1675 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1676 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1677 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1678 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1680 (define_insn "*avx_<code><mode>3"
1681 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1682 (any_logic:AVXMODEF2P
1683 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1684 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1685 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1686 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1688 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1689 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1691 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1693 [(set_attr "type" "sselog")
1694 (set_attr "prefix" "vex")
1695 (set_attr "mode" "<avxvecmode>")])
1697 (define_expand "<code><mode>3"
1698 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1699 (any_logic:SSEMODEF2P
1700 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1701 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1702 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1703 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1705 (define_insn "*<code><mode>3"
1706 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1707 (any_logic:SSEMODEF2P
1708 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1710 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1711 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1713 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1714 return "<logic>ps\t{%2, %0|%0, %2}";
1716 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1718 [(set_attr "type" "sselog")
1719 (set_attr "mode" "<MODE>")])
1721 (define_expand "copysign<mode>3"
1724 (not:VEC_FLOAT_MODE (match_dup 3))
1725 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1727 (and:VEC_FLOAT_MODE (match_dup 3)
1728 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1729 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1730 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1733 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1735 operands[4] = gen_reg_rtx (<MODE>mode);
1736 operands[5] = gen_reg_rtx (<MODE>mode);
1739 ;; Also define scalar versions. These are used for abs, neg, and
1740 ;; conditional move. Using subregs into vector modes causes register
1741 ;; allocation lossage. These patterns do not allow memory operands
1742 ;; because the native instructions read the full 128-bits.
1744 (define_insn "*avx_andnot<mode>3"
1745 [(set (match_operand:MODEF 0 "register_operand" "=x")
1748 (match_operand:MODEF 1 "register_operand" "x"))
1749 (match_operand:MODEF 2 "register_operand" "x")))]
1750 "AVX_FLOAT_MODE_P (<MODE>mode)"
1751 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1752 [(set_attr "type" "sselog")
1753 (set_attr "prefix" "vex")
1754 (set_attr "mode" "<ssevecmode>")])
1756 (define_insn "*andnot<mode>3"
1757 [(set (match_operand:MODEF 0 "register_operand" "=x")
1760 (match_operand:MODEF 1 "register_operand" "0"))
1761 (match_operand:MODEF 2 "register_operand" "x")))]
1762 "SSE_FLOAT_MODE_P (<MODE>mode)"
1763 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1764 [(set_attr "type" "sselog")
1765 (set_attr "mode" "<ssevecmode>")])
1767 (define_insn "*avx_<code><mode>3"
1768 [(set (match_operand:MODEF 0 "register_operand" "=x")
1770 (match_operand:MODEF 1 "register_operand" "x")
1771 (match_operand:MODEF 2 "register_operand" "x")))]
1772 "AVX_FLOAT_MODE_P (<MODE>mode)"
1774 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1775 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1777 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1779 [(set_attr "type" "sselog")
1780 (set_attr "prefix" "vex")
1781 (set_attr "mode" "<ssevecmode>")])
1783 (define_insn "*<code><mode>3"
1784 [(set (match_operand:MODEF 0 "register_operand" "=x")
1786 (match_operand:MODEF 1 "register_operand" "0")
1787 (match_operand:MODEF 2 "register_operand" "x")))]
1788 "SSE_FLOAT_MODE_P (<MODE>mode)"
1790 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1791 return "<logic>ps\t{%2, %0|%0, %2}";
1793 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1795 [(set_attr "type" "sselog")
1796 (set_attr "mode" "<ssevecmode>")])
1798 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1800 ;; FMA4 floating point multiply/accumulate instructions. This
1801 ;; includes the scalar version of the instructions as well as the
1804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1806 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1807 ;; combine to generate a multiply/add with two memory references. We then
1808 ;; split this insn, into loading up the destination register with one of the
1809 ;; memory operations. If we don't manage to split the insn, reload will
1810 ;; generate the appropriate moves. The reason this is needed, is that combine
1811 ;; has already folded one of the memory references into both the multiply and
1812 ;; add insns, and it can't generate a new pseudo. I.e.:
1813 ;; (set (reg1) (mem (addr1)))
1814 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1815 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1817 ;; Intrinsic FMA operations.
1819 ;; The standard name for fma is only available with SSE math enabled.
1820 (define_expand "fma<mode>4"
1821 [(set (match_operand:FMAMODE 0 "register_operand")
1823 (match_operand:FMAMODE 1 "nonimmediate_operand")
1824 (match_operand:FMAMODE 2 "nonimmediate_operand")
1825 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1826 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1829 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1830 (define_expand "fma4i_fmadd_<mode>"
1831 [(set (match_operand:FMAMODE 0 "register_operand")
1833 (match_operand:FMAMODE 1 "nonimmediate_operand")
1834 (match_operand:FMAMODE 2 "nonimmediate_operand")
1835 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1836 "TARGET_FMA || TARGET_FMA4"
1839 (define_insn "*fma4i_fmadd_<mode>"
1840 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1842 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1843 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1844 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1846 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*fma4i_fmsub_<mode>"
1851 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1853 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1854 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1856 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1858 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1862 (define_insn "*fma4i_fnmadd_<mode>"
1863 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1866 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1867 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1868 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1870 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1871 [(set_attr "type" "ssemuladd")
1872 (set_attr "mode" "<MODE>")])
1874 (define_insn "*fma4i_fnmsub_<mode>"
1875 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1878 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1881 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1883 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1887 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1888 ;; entire destination register, with the high-order elements zeroed.
1890 (define_expand "fma4i_vmfmadd_<mode>"
1891 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1892 (vec_merge:SSEMODEF2P
1894 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1895 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1896 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1901 operands[4] = CONST0_RTX (<MODE>mode);
1904 (define_insn "*fma4i_vmfmadd_<mode>"
1905 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1906 (vec_merge:SSEMODEF2P
1908 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1909 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1910 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1911 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1914 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 (define_insn "*fma4i_vmfmsub_<mode>"
1919 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1920 (vec_merge:SSEMODEF2P
1922 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1923 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1925 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1926 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1929 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1930 [(set_attr "type" "ssemuladd")
1931 (set_attr "mode" "<MODE>")])
1933 (define_insn "*fma4i_vmfnmadd_<mode>"
1934 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1935 (vec_merge:SSEMODEF2P
1938 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1939 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1940 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1941 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1944 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1945 [(set_attr "type" "ssemuladd")
1946 (set_attr "mode" "<MODE>")])
1948 (define_insn "*fma4i_vmfnmsub_<mode>"
1949 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1950 (vec_merge:SSEMODEF2P
1953 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1954 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1956 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1957 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1960 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1961 [(set_attr "type" "ssemuladd")
1962 (set_attr "mode" "<MODE>")])
1964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1966 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1968 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1970 ;; It would be possible to represent these without the UNSPEC as
1973 ;; (fma op1 op2 op3)
1974 ;; (fma op1 op2 (neg op3))
1977 ;; But this doesn't seem useful in practice.
1979 (define_expand "fmaddsub_<mode>"
1980 [(set (match_operand:AVXMODEF2P 0 "register_operand")
1982 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
1983 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
1984 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
1986 "TARGET_FMA || TARGET_FMA4"
1989 (define_insn "*fma4_fmaddsub_<mode>"
1990 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
1992 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
1993 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
1994 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
1997 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1998 [(set_attr "type" "ssemuladd")
1999 (set_attr "mode" "<MODE>")])
2001 (define_insn "*fma4_fmsubadd_<mode>"
2002 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2004 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2005 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2007 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2010 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2011 [(set_attr "type" "ssemuladd")
2012 (set_attr "mode" "<MODE>")])
2014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2016 ;; FMA3 floating point multiply/accumulate instructions.
2018 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2020 (define_insn "*fma_fmadd_<mode>"
2021 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2023 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2024 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2025 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2028 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2029 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2030 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "type" "ssemuladd")
2032 (set_attr "mode" "<MODE>")])
2034 (define_insn "*fma_fmsub_<mode>"
2035 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2037 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2038 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2040 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2043 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2044 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2045 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2046 [(set_attr "type" "ssemuladd")
2047 (set_attr "mode" "<MODE>")])
2049 (define_insn "*fma_fmadd_<mode>"
2050 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2053 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2054 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2055 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2058 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2059 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2060 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2061 [(set_attr "type" "ssemuladd")
2062 (set_attr "mode" "<MODE>")])
2064 (define_insn "*fma_fmsub_<mode>"
2065 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2068 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2069 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2071 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2074 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2075 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2076 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2077 [(set_attr "type" "ssemuladd")
2078 (set_attr "mode" "<MODE>")])
2080 (define_insn "*fma_fmaddsub_<mode>"
2081 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2083 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2084 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2085 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
2089 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2090 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2091 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2092 [(set_attr "type" "ssemuladd")
2093 (set_attr "mode" "<MODE>")])
2095 (define_insn "*fma_fmsubadd_<mode>"
2096 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2098 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2099 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2101 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
2105 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2106 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2107 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "type" "ssemuladd")
2109 (set_attr "mode" "<MODE>")])
2111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2113 ;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
2115 ;; ??? If fused-madd were a generic flag, combine could do this without
2116 ;; needing splitters here in the backend. Irritatingly, combine won't
2117 ;; recognize many of these with mere splits, since only 3 or more insns
2118 ;; are allowed to split during combine. Thankfully, there's always a
2119 ;; split_all_insns pass that runs before reload.
2121 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2123 (define_insn_and_split "*split_fma"
2124 [(set (match_operand:FMAMODE 0 "register_operand")
2127 (match_operand:FMAMODE 1 "nonimmediate_operand")
2128 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2129 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2130 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2131 && (TARGET_FMA || TARGET_FMA4)
2132 && !(reload_in_progress || reload_completed)"
2133 { gcc_unreachable (); }
2142 ;; Floating multiply and subtract.
2143 (define_insn_and_split "*split_fms"
2144 [(set (match_operand:FMAMODE 0 "register_operand")
2147 (match_operand:FMAMODE 1 "nonimmediate_operand")
2148 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2149 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2150 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2151 && (TARGET_FMA || TARGET_FMA4)
2152 && !(reload_in_progress || reload_completed)"
2153 { gcc_unreachable (); }
2159 (neg:FMAMODE (match_dup 3))))]
2162 ;; Floating point negative multiply and add.
2163 ;; Recognize (-a * b + c) via the canonical form: c - (a * b).
2164 (define_insn_and_split "*split_fnma"
2165 [(set (match_operand:FMAMODE 0 "register_operand")
2167 (match_operand:FMAMODE 3 "nonimmediate_operand")
2169 (match_operand:FMAMODE 1 "nonimmediate_operand")
2170 (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
2171 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2172 && (TARGET_FMA || TARGET_FMA4)
2173 && !(reload_in_progress || reload_completed)"
2174 { gcc_unreachable (); }
2178 (neg:FMAMODE (match_dup 1))
2183 ;; Floating point negative multiply and subtract.
2184 ;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
2185 (define_insn_and_split "*split_fnms"
2186 [(set (match_operand:FMAMODE 0 "register_operand")
2190 (match_operand:FMAMODE 1 "nonimmediate_operand"))
2191 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2192 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2193 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2194 && (TARGET_FMA || TARGET_FMA4)
2195 && !(reload_in_progress || reload_completed)"
2196 { gcc_unreachable (); }
2200 (neg:FMAMODE (match_dup 1))
2202 (neg:FMAMODE (match_dup 3))))]
2205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2207 ;; Parallel single-precision floating point conversion operations
2209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2211 (define_insn "sse_cvtpi2ps"
2212 [(set (match_operand:V4SF 0 "register_operand" "=x")
2215 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2216 (match_operand:V4SF 1 "register_operand" "0")
2219 "cvtpi2ps\t{%2, %0|%0, %2}"
2220 [(set_attr "type" "ssecvt")
2221 (set_attr "mode" "V4SF")])
2223 (define_insn "sse_cvtps2pi"
2224 [(set (match_operand:V2SI 0 "register_operand" "=y")
2226 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2228 (parallel [(const_int 0) (const_int 1)])))]
2230 "cvtps2pi\t{%1, %0|%0, %1}"
2231 [(set_attr "type" "ssecvt")
2232 (set_attr "unit" "mmx")
2233 (set_attr "mode" "DI")])
2235 (define_insn "sse_cvttps2pi"
2236 [(set (match_operand:V2SI 0 "register_operand" "=y")
2238 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2239 (parallel [(const_int 0) (const_int 1)])))]
2241 "cvttps2pi\t{%1, %0|%0, %1}"
2242 [(set_attr "type" "ssecvt")
2243 (set_attr "unit" "mmx")
2244 (set_attr "prefix_rep" "0")
2245 (set_attr "mode" "SF")])
2247 (define_insn "*avx_cvtsi2ss"
2248 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2252 (match_operand:V4SF 1 "register_operand" "x")
2255 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2256 [(set_attr "type" "sseicvt")
2257 (set_attr "prefix" "vex")
2258 (set_attr "mode" "SF")])
2260 (define_insn "sse_cvtsi2ss"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2264 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2265 (match_operand:V4SF 1 "register_operand" "0,0")
2268 "cvtsi2ss\t{%2, %0|%0, %2}"
2269 [(set_attr "type" "sseicvt")
2270 (set_attr "athlon_decode" "vector,double")
2271 (set_attr "amdfam10_decode" "vector,double")
2272 (set_attr "bdver1_decode" "double,direct")
2273 (set_attr "mode" "SF")])
2275 (define_insn "*avx_cvtsi2ssq"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x")
2279 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2280 (match_operand:V4SF 1 "register_operand" "x")
2282 "TARGET_AVX && TARGET_64BIT"
2283 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2284 [(set_attr "type" "sseicvt")
2285 (set_attr "length_vex" "4")
2286 (set_attr "prefix" "vex")
2287 (set_attr "mode" "SF")])
2289 (define_insn "sse_cvtsi2ssq"
2290 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2293 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2294 (match_operand:V4SF 1 "register_operand" "0,0")
2296 "TARGET_SSE && TARGET_64BIT"
2297 "cvtsi2ssq\t{%2, %0|%0, %2}"
2298 [(set_attr "type" "sseicvt")
2299 (set_attr "prefix_rex" "1")
2300 (set_attr "athlon_decode" "vector,double")
2301 (set_attr "amdfam10_decode" "vector,double")
2302 (set_attr "bdver1_decode" "double,direct")
2303 (set_attr "mode" "SF")])
2305 (define_insn "sse_cvtss2si"
2306 [(set (match_operand:SI 0 "register_operand" "=r,r")
2309 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2310 (parallel [(const_int 0)]))]
2311 UNSPEC_FIX_NOTRUNC))]
2313 "%vcvtss2si\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "sseicvt")
2315 (set_attr "athlon_decode" "double,vector")
2316 (set_attr "bdver1_decode" "double,double")
2317 (set_attr "prefix_rep" "1")
2318 (set_attr "prefix" "maybe_vex")
2319 (set_attr "mode" "SI")])
2321 (define_insn "sse_cvtss2si_2"
2322 [(set (match_operand:SI 0 "register_operand" "=r,r")
2323 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2324 UNSPEC_FIX_NOTRUNC))]
2326 "%vcvtss2si\t{%1, %0|%0, %1}"
2327 [(set_attr "type" "sseicvt")
2328 (set_attr "athlon_decode" "double,vector")
2329 (set_attr "amdfam10_decode" "double,double")
2330 (set_attr "bdver1_decode" "double,double")
2331 (set_attr "prefix_rep" "1")
2332 (set_attr "prefix" "maybe_vex")
2333 (set_attr "mode" "SI")])
2335 (define_insn "sse_cvtss2siq"
2336 [(set (match_operand:DI 0 "register_operand" "=r,r")
2339 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2340 (parallel [(const_int 0)]))]
2341 UNSPEC_FIX_NOTRUNC))]
2342 "TARGET_SSE && TARGET_64BIT"
2343 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2344 [(set_attr "type" "sseicvt")
2345 (set_attr "athlon_decode" "double,vector")
2346 (set_attr "bdver1_decode" "double,double")
2347 (set_attr "prefix_rep" "1")
2348 (set_attr "prefix" "maybe_vex")
2349 (set_attr "mode" "DI")])
2351 (define_insn "sse_cvtss2siq_2"
2352 [(set (match_operand:DI 0 "register_operand" "=r,r")
2353 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2354 UNSPEC_FIX_NOTRUNC))]
2355 "TARGET_SSE && TARGET_64BIT"
2356 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2357 [(set_attr "type" "sseicvt")
2358 (set_attr "athlon_decode" "double,vector")
2359 (set_attr "amdfam10_decode" "double,double")
2360 (set_attr "bdver1_decode" "double,double")
2361 (set_attr "prefix_rep" "1")
2362 (set_attr "prefix" "maybe_vex")
2363 (set_attr "mode" "DI")])
2365 (define_insn "sse_cvttss2si"
2366 [(set (match_operand:SI 0 "register_operand" "=r,r")
2369 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2370 (parallel [(const_int 0)]))))]
2372 "%vcvttss2si\t{%1, %0|%0, %1}"
2373 [(set_attr "type" "sseicvt")
2374 (set_attr "athlon_decode" "double,vector")
2375 (set_attr "amdfam10_decode" "double,double")
2376 (set_attr "bdver1_decode" "double,double")
2377 (set_attr "prefix_rep" "1")
2378 (set_attr "prefix" "maybe_vex")
2379 (set_attr "mode" "SI")])
2381 (define_insn "sse_cvttss2siq"
2382 [(set (match_operand:DI 0 "register_operand" "=r,r")
2385 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2386 (parallel [(const_int 0)]))))]
2387 "TARGET_SSE && TARGET_64BIT"
2388 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2389 [(set_attr "type" "sseicvt")
2390 (set_attr "athlon_decode" "double,vector")
2391 (set_attr "amdfam10_decode" "double,double")
2392 (set_attr "bdver1_decode" "double,double")
2393 (set_attr "prefix_rep" "1")
2394 (set_attr "prefix" "maybe_vex")
2395 (set_attr "mode" "DI")])
2397 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2398 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2399 (float:AVXMODEDCVTDQ2PS
2400 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2402 "vcvtdq2ps\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "prefix" "vex")
2405 (set_attr "mode" "<avxvecmode>")])
2407 (define_insn "sse2_cvtdq2ps"
2408 [(set (match_operand:V4SF 0 "register_operand" "=x")
2409 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2411 "cvtdq2ps\t{%1, %0|%0, %1}"
2412 [(set_attr "type" "ssecvt")
2413 (set_attr "mode" "V4SF")])
2415 (define_expand "sse2_cvtudq2ps"
2417 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2419 (lt:V4SF (match_dup 5) (match_dup 3)))
2421 (and:V4SF (match_dup 6) (match_dup 4)))
2422 (set (match_operand:V4SF 0 "register_operand" "")
2423 (plus:V4SF (match_dup 5) (match_dup 7)))]
2426 REAL_VALUE_TYPE TWO32r;
2430 real_ldexp (&TWO32r, &dconst1, 32);
2431 x = const_double_from_real_value (TWO32r, SFmode);
2433 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2434 operands[4] = force_reg (V4SFmode,
2435 ix86_build_const_vector (V4SFmode, 1, x));
2437 for (i = 5; i < 8; i++)
2438 operands[i] = gen_reg_rtx (V4SFmode);
2441 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2442 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2443 (unspec:AVXMODEDCVTPS2DQ
2444 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2445 UNSPEC_FIX_NOTRUNC))]
2447 "vcvtps2dq\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "ssecvt")
2449 (set_attr "prefix" "vex")
2450 (set_attr "mode" "<avxvecmode>")])
2452 (define_insn "sse2_cvtps2dq"
2453 [(set (match_operand:V4SI 0 "register_operand" "=x")
2454 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2455 UNSPEC_FIX_NOTRUNC))]
2457 "cvtps2dq\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "ssecvt")
2459 (set_attr "prefix_data16" "1")
2460 (set_attr "mode" "TI")])
2462 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2463 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2464 (fix:AVXMODEDCVTPS2DQ
2465 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2467 "vcvttps2dq\t{%1, %0|%0, %1}"
2468 [(set_attr "type" "ssecvt")
2469 (set_attr "prefix" "vex")
2470 (set_attr "mode" "<avxvecmode>")])
2472 (define_insn "sse2_cvttps2dq"
2473 [(set (match_operand:V4SI 0 "register_operand" "=x")
2474 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2476 "cvttps2dq\t{%1, %0|%0, %1}"
2477 [(set_attr "type" "ssecvt")
2478 (set_attr "prefix_rep" "1")
2479 (set_attr "prefix_data16" "0")
2480 (set_attr "mode" "TI")])
2482 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2484 ;; Parallel double-precision floating point conversion operations
2486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2488 (define_insn "sse2_cvtpi2pd"
2489 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2490 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2492 "cvtpi2pd\t{%1, %0|%0, %1}"
2493 [(set_attr "type" "ssecvt")
2494 (set_attr "unit" "mmx,*")
2495 (set_attr "prefix_data16" "1,*")
2496 (set_attr "mode" "V2DF")])
2498 (define_insn "sse2_cvtpd2pi"
2499 [(set (match_operand:V2SI 0 "register_operand" "=y")
2500 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2501 UNSPEC_FIX_NOTRUNC))]
2503 "cvtpd2pi\t{%1, %0|%0, %1}"
2504 [(set_attr "type" "ssecvt")
2505 (set_attr "unit" "mmx")
2506 (set_attr "prefix_data16" "1")
2507 (set_attr "mode" "DI")
2508 (set_attr "bdver1_decode" "double")])
2510 (define_insn "sse2_cvttpd2pi"
2511 [(set (match_operand:V2SI 0 "register_operand" "=y")
2512 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2514 "cvttpd2pi\t{%1, %0|%0, %1}"
2515 [(set_attr "type" "ssecvt")
2516 (set_attr "unit" "mmx")
2517 (set_attr "prefix_data16" "1")
2518 (set_attr "mode" "TI")
2519 (set_attr "bdver1_decode" "double")])
2521 (define_insn "*avx_cvtsi2sd"
2522 [(set (match_operand:V2DF 0 "register_operand" "=x")
2525 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2526 (match_operand:V2DF 1 "register_operand" "x")
2529 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2530 [(set_attr "type" "sseicvt")
2531 (set_attr "prefix" "vex")
2532 (set_attr "mode" "DF")])
2534 (define_insn "sse2_cvtsi2sd"
2535 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2538 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2539 (match_operand:V2DF 1 "register_operand" "0,0")
2542 "cvtsi2sd\t{%2, %0|%0, %2}"
2543 [(set_attr "type" "sseicvt")
2544 (set_attr "mode" "DF")
2545 (set_attr "athlon_decode" "double,direct")
2546 (set_attr "amdfam10_decode" "vector,double")
2547 (set_attr "bdver1_decode" "double,direct")])
2549 (define_insn "*avx_cvtsi2sdq"
2550 [(set (match_operand:V2DF 0 "register_operand" "=x")
2553 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2554 (match_operand:V2DF 1 "register_operand" "x")
2556 "TARGET_AVX && TARGET_64BIT"
2557 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2558 [(set_attr "type" "sseicvt")
2559 (set_attr "length_vex" "4")
2560 (set_attr "prefix" "vex")
2561 (set_attr "mode" "DF")])
2563 (define_insn "sse2_cvtsi2sdq"
2564 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2567 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2568 (match_operand:V2DF 1 "register_operand" "0,0")
2570 "TARGET_SSE2 && TARGET_64BIT"
2571 "cvtsi2sdq\t{%2, %0|%0, %2}"
2572 [(set_attr "type" "sseicvt")
2573 (set_attr "prefix_rex" "1")
2574 (set_attr "mode" "DF")
2575 (set_attr "athlon_decode" "double,direct")
2576 (set_attr "amdfam10_decode" "vector,double")
2577 (set_attr "bdver1_decode" "double,direct")])
2579 (define_insn "sse2_cvtsd2si"
2580 [(set (match_operand:SI 0 "register_operand" "=r,r")
2583 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2584 (parallel [(const_int 0)]))]
2585 UNSPEC_FIX_NOTRUNC))]
2587 "%vcvtsd2si\t{%1, %0|%0, %1}"
2588 [(set_attr "type" "sseicvt")
2589 (set_attr "athlon_decode" "double,vector")
2590 (set_attr "bdver1_decode" "double,double")
2591 (set_attr "prefix_rep" "1")
2592 (set_attr "prefix" "maybe_vex")
2593 (set_attr "mode" "SI")])
2595 (define_insn "sse2_cvtsd2si_2"
2596 [(set (match_operand:SI 0 "register_operand" "=r,r")
2597 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2598 UNSPEC_FIX_NOTRUNC))]
2600 "%vcvtsd2si\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "athlon_decode" "double,vector")
2603 (set_attr "amdfam10_decode" "double,double")
2604 (set_attr "bdver1_decode" "double,double")
2605 (set_attr "prefix_rep" "1")
2606 (set_attr "prefix" "maybe_vex")
2607 (set_attr "mode" "SI")])
2609 (define_insn "sse2_cvtsd2siq"
2610 [(set (match_operand:DI 0 "register_operand" "=r,r")
2613 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2614 (parallel [(const_int 0)]))]
2615 UNSPEC_FIX_NOTRUNC))]
2616 "TARGET_SSE2 && TARGET_64BIT"
2617 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2618 [(set_attr "type" "sseicvt")
2619 (set_attr "athlon_decode" "double,vector")
2620 (set_attr "bdver1_decode" "double,double")
2621 (set_attr "prefix_rep" "1")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "DI")])
2625 (define_insn "sse2_cvtsd2siq_2"
2626 [(set (match_operand:DI 0 "register_operand" "=r,r")
2627 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2628 UNSPEC_FIX_NOTRUNC))]
2629 "TARGET_SSE2 && TARGET_64BIT"
2630 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sseicvt")
2632 (set_attr "athlon_decode" "double,vector")
2633 (set_attr "amdfam10_decode" "double,double")
2634 (set_attr "bdver1_decode" "double,double")
2635 (set_attr "prefix_rep" "1")
2636 (set_attr "prefix" "maybe_vex")
2637 (set_attr "mode" "DI")])
2639 (define_insn "sse2_cvttsd2si"
2640 [(set (match_operand:SI 0 "register_operand" "=r,r")
2643 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2644 (parallel [(const_int 0)]))))]
2646 "%vcvttsd2si\t{%1, %0|%0, %1}"
2647 [(set_attr "type" "sseicvt")
2648 (set_attr "prefix_rep" "1")
2649 (set_attr "prefix" "maybe_vex")
2650 (set_attr "mode" "SI")
2651 (set_attr "athlon_decode" "double,vector")
2652 (set_attr "amdfam10_decode" "double,double")
2653 (set_attr "bdver1_decode" "double,double")])
2655 (define_insn "sse2_cvttsd2siq"
2656 [(set (match_operand:DI 0 "register_operand" "=r,r")
2659 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2660 (parallel [(const_int 0)]))))]
2661 "TARGET_SSE2 && TARGET_64BIT"
2662 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2663 [(set_attr "type" "sseicvt")
2664 (set_attr "prefix_rep" "1")
2665 (set_attr "prefix" "maybe_vex")
2666 (set_attr "mode" "DI")
2667 (set_attr "athlon_decode" "double,vector")
2668 (set_attr "amdfam10_decode" "double,double")
2669 (set_attr "bdver1_decode" "double,double")])
2671 (define_insn "avx_cvtdq2pd256"
2672 [(set (match_operand:V4DF 0 "register_operand" "=x")
2673 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2675 "vcvtdq2pd\t{%1, %0|%0, %1}"
2676 [(set_attr "type" "ssecvt")
2677 (set_attr "prefix" "vex")
2678 (set_attr "mode" "V4DF")])
2680 (define_insn "*avx_cvtdq2pd256_2"
2681 [(set (match_operand:V4DF 0 "register_operand" "=x")
2684 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2685 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2687 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2688 [(set_attr "type" "ssecvt")
2689 (set_attr "prefix" "vex")
2690 (set_attr "mode" "V4DF")])
2692 (define_insn "sse2_cvtdq2pd"
2693 [(set (match_operand:V2DF 0 "register_operand" "=x")
2696 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2697 (parallel [(const_int 0) (const_int 1)]))))]
2699 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2700 [(set_attr "type" "ssecvt")
2701 (set_attr "prefix" "maybe_vex")
2702 (set_attr "mode" "V2DF")])
2704 (define_insn "avx_cvtpd2dq256"
2705 [(set (match_operand:V4SI 0 "register_operand" "=x")
2706 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2707 UNSPEC_FIX_NOTRUNC))]
2709 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2710 [(set_attr "type" "ssecvt")
2711 (set_attr "prefix" "vex")
2712 (set_attr "mode" "OI")])
2714 (define_expand "sse2_cvtpd2dq"
2715 [(set (match_operand:V4SI 0 "register_operand" "")
2717 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2721 "operands[2] = CONST0_RTX (V2SImode);")
2723 (define_insn "*sse2_cvtpd2dq"
2724 [(set (match_operand:V4SI 0 "register_operand" "=x")
2726 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2728 (match_operand:V2SI 2 "const0_operand" "")))]
2730 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2731 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2732 [(set_attr "type" "ssecvt")
2733 (set_attr "prefix_rep" "1")
2734 (set_attr "prefix_data16" "0")
2735 (set_attr "prefix" "maybe_vex")
2736 (set_attr "mode" "TI")
2737 (set_attr "amdfam10_decode" "double")
2738 (set_attr "bdver1_decode" "double")])
2740 (define_insn "avx_cvttpd2dq256"
2741 [(set (match_operand:V4SI 0 "register_operand" "=x")
2742 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2744 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2745 [(set_attr "type" "ssecvt")
2746 (set_attr "prefix" "vex")
2747 (set_attr "mode" "OI")])
2749 (define_expand "sse2_cvttpd2dq"
2750 [(set (match_operand:V4SI 0 "register_operand" "")
2752 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2755 "operands[2] = CONST0_RTX (V2SImode);")
2757 (define_insn "*sse2_cvttpd2dq"
2758 [(set (match_operand:V4SI 0 "register_operand" "=x")
2760 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2761 (match_operand:V2SI 2 "const0_operand" "")))]
2763 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2764 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2765 [(set_attr "type" "ssecvt")
2766 (set_attr "prefix" "maybe_vex")
2767 (set_attr "mode" "TI")
2768 (set_attr "amdfam10_decode" "double")
2769 (set_attr "bdver1_decode" "double")])
2771 (define_insn "*avx_cvtsd2ss"
2772 [(set (match_operand:V4SF 0 "register_operand" "=x")
2775 (float_truncate:V2SF
2776 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2777 (match_operand:V4SF 1 "register_operand" "x")
2780 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2781 [(set_attr "type" "ssecvt")
2782 (set_attr "prefix" "vex")
2783 (set_attr "mode" "SF")])
2785 (define_insn "sse2_cvtsd2ss"
2786 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2789 (float_truncate:V2SF
2790 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2791 (match_operand:V4SF 1 "register_operand" "0,0")
2794 "cvtsd2ss\t{%2, %0|%0, %2}"
2795 [(set_attr "type" "ssecvt")
2796 (set_attr "athlon_decode" "vector,double")
2797 (set_attr "amdfam10_decode" "vector,double")
2798 (set_attr "bdver1_decode" "direct,direct")
2799 (set_attr "mode" "SF")])
2801 (define_insn "*avx_cvtss2sd"
2802 [(set (match_operand:V2DF 0 "register_operand" "=x")
2806 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2807 (parallel [(const_int 0) (const_int 1)])))
2808 (match_operand:V2DF 1 "register_operand" "x")
2811 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2812 [(set_attr "type" "ssecvt")
2813 (set_attr "prefix" "vex")
2814 (set_attr "mode" "DF")])
2816 (define_insn "sse2_cvtss2sd"
2817 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2821 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2822 (parallel [(const_int 0) (const_int 1)])))
2823 (match_operand:V2DF 1 "register_operand" "0,0")
2826 "cvtss2sd\t{%2, %0|%0, %2}"
2827 [(set_attr "type" "ssecvt")
2828 (set_attr "amdfam10_decode" "vector,double")
2829 (set_attr "bdver1_decode" "direct,direct")
2830 (set_attr "mode" "DF")])
2832 (define_insn "avx_cvtpd2ps256"
2833 [(set (match_operand:V4SF 0 "register_operand" "=x")
2834 (float_truncate:V4SF
2835 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2837 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2838 [(set_attr "type" "ssecvt")
2839 (set_attr "prefix" "vex")
2840 (set_attr "mode" "V4SF")])
2842 (define_expand "sse2_cvtpd2ps"
2843 [(set (match_operand:V4SF 0 "register_operand" "")
2845 (float_truncate:V2SF
2846 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2849 "operands[2] = CONST0_RTX (V2SFmode);")
2851 (define_insn "*sse2_cvtpd2ps"
2852 [(set (match_operand:V4SF 0 "register_operand" "=x")
2854 (float_truncate:V2SF
2855 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2856 (match_operand:V2SF 2 "const0_operand" "")))]
2858 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2859 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2860 [(set_attr "type" "ssecvt")
2861 (set_attr "prefix_data16" "1")
2862 (set_attr "prefix" "maybe_vex")
2863 (set_attr "mode" "V4SF")
2864 (set_attr "amdfam10_decode" "double")
2865 (set_attr "bdver1_decode" "double")])
2867 (define_insn "avx_cvtps2pd256"
2868 [(set (match_operand:V4DF 0 "register_operand" "=x")
2870 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2872 "vcvtps2pd\t{%1, %0|%0, %1}"
2873 [(set_attr "type" "ssecvt")
2874 (set_attr "prefix" "vex")
2875 (set_attr "mode" "V4DF")])
2877 (define_insn "*avx_cvtps2pd256_2"
2878 [(set (match_operand:V4DF 0 "register_operand" "=x")
2881 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2882 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2884 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2885 [(set_attr "type" "ssecvt")
2886 (set_attr "prefix" "vex")
2887 (set_attr "mode" "V4DF")])
2889 (define_insn "sse2_cvtps2pd"
2890 [(set (match_operand:V2DF 0 "register_operand" "=x")
2893 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2894 (parallel [(const_int 0) (const_int 1)]))))]
2896 "%vcvtps2pd\t{%1, %0|%0, %1}"
2897 [(set_attr "type" "ssecvt")
2898 (set_attr "prefix" "maybe_vex")
2899 (set_attr "mode" "V2DF")
2900 (set_attr "prefix_data16" "0")
2901 (set_attr "amdfam10_decode" "direct")
2902 (set_attr "bdver1_decode" "double")])
2904 (define_expand "vec_unpacks_hi_v4sf"
2909 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2910 (parallel [(const_int 6)
2914 (set (match_operand:V2DF 0 "register_operand" "")
2918 (parallel [(const_int 0) (const_int 1)]))))]
2920 "operands[2] = gen_reg_rtx (V4SFmode);")
2922 (define_expand "vec_unpacks_hi_v8sf"
2925 (match_operand:V8SF 1 "nonimmediate_operand" "")
2926 (parallel [(const_int 4)
2930 (set (match_operand:V4DF 0 "register_operand" "")
2935 operands[2] = gen_reg_rtx (V4SFmode);
2938 (define_expand "vec_unpacks_lo_v4sf"
2939 [(set (match_operand:V2DF 0 "register_operand" "")
2942 (match_operand:V4SF 1 "nonimmediate_operand" "")
2943 (parallel [(const_int 0) (const_int 1)]))))]
2946 (define_expand "vec_unpacks_lo_v8sf"
2947 [(set (match_operand:V4DF 0 "register_operand" "")
2950 (match_operand:V8SF 1 "nonimmediate_operand" "")
2951 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2954 (define_expand "vec_unpacks_float_hi_v8hi"
2955 [(match_operand:V4SF 0 "register_operand" "")
2956 (match_operand:V8HI 1 "register_operand" "")]
2959 rtx tmp = gen_reg_rtx (V4SImode);
2961 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2962 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2966 (define_expand "vec_unpacks_float_lo_v8hi"
2967 [(match_operand:V4SF 0 "register_operand" "")
2968 (match_operand:V8HI 1 "register_operand" "")]
2971 rtx tmp = gen_reg_rtx (V4SImode);
2973 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2974 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2978 (define_expand "vec_unpacku_float_hi_v8hi"
2979 [(match_operand:V4SF 0 "register_operand" "")
2980 (match_operand:V8HI 1 "register_operand" "")]
2983 rtx tmp = gen_reg_rtx (V4SImode);
2985 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2986 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2990 (define_expand "vec_unpacku_float_lo_v8hi"
2991 [(match_operand:V4SF 0 "register_operand" "")
2992 (match_operand:V8HI 1 "register_operand" "")]
2995 rtx tmp = gen_reg_rtx (V4SImode);
2997 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2998 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3002 (define_expand "vec_unpacks_float_hi_v4si"
3005 (match_operand:V4SI 1 "nonimmediate_operand" "")
3006 (parallel [(const_int 2)
3010 (set (match_operand:V2DF 0 "register_operand" "")
3014 (parallel [(const_int 0) (const_int 1)]))))]
3016 "operands[2] = gen_reg_rtx (V4SImode);")
3018 (define_expand "vec_unpacks_float_lo_v4si"
3019 [(set (match_operand:V2DF 0 "register_operand" "")
3022 (match_operand:V4SI 1 "nonimmediate_operand" "")
3023 (parallel [(const_int 0) (const_int 1)]))))]
3026 (define_expand "vec_unpacks_float_hi_v8si"
3029 (match_operand:V8SI 1 "nonimmediate_operand" "")
3030 (parallel [(const_int 4)
3034 (set (match_operand:V4DF 0 "register_operand" "")
3038 "operands[2] = gen_reg_rtx (V4SImode);")
3040 (define_expand "vec_unpacks_float_lo_v8si"
3041 [(set (match_operand:V4DF 0 "register_operand" "")
3044 (match_operand:V8SI 1 "nonimmediate_operand" "")
3045 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3048 (define_expand "vec_unpacku_float_hi_v4si"
3051 (match_operand:V4SI 1 "nonimmediate_operand" "")
3052 (parallel [(const_int 2)
3060 (parallel [(const_int 0) (const_int 1)]))))
3062 (lt:V2DF (match_dup 6) (match_dup 3)))
3064 (and:V2DF (match_dup 7) (match_dup 4)))
3065 (set (match_operand:V2DF 0 "register_operand" "")
3066 (plus:V2DF (match_dup 6) (match_dup 8)))]
3069 REAL_VALUE_TYPE TWO32r;
3073 real_ldexp (&TWO32r, &dconst1, 32);
3074 x = const_double_from_real_value (TWO32r, DFmode);
3076 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3077 operands[4] = force_reg (V2DFmode,
3078 ix86_build_const_vector (V2DFmode, 1, x));
3080 operands[5] = gen_reg_rtx (V4SImode);
3082 for (i = 6; i < 9; i++)
3083 operands[i] = gen_reg_rtx (V2DFmode);
3086 (define_expand "vec_unpacku_float_lo_v4si"
3090 (match_operand:V4SI 1 "nonimmediate_operand" "")
3091 (parallel [(const_int 0) (const_int 1)]))))
3093 (lt:V2DF (match_dup 5) (match_dup 3)))
3095 (and:V2DF (match_dup 6) (match_dup 4)))
3096 (set (match_operand:V2DF 0 "register_operand" "")
3097 (plus:V2DF (match_dup 5) (match_dup 7)))]
3100 REAL_VALUE_TYPE TWO32r;
3104 real_ldexp (&TWO32r, &dconst1, 32);
3105 x = const_double_from_real_value (TWO32r, DFmode);
3107 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3108 operands[4] = force_reg (V2DFmode,
3109 ix86_build_const_vector (V2DFmode, 1, x));
3111 for (i = 5; i < 8; i++)
3112 operands[i] = gen_reg_rtx (V2DFmode);
3115 (define_expand "vec_pack_trunc_v4df"
3117 (float_truncate:V4SF
3118 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3120 (float_truncate:V4SF
3121 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3122 (set (match_operand:V8SF 0 "register_operand" "")
3128 operands[3] = gen_reg_rtx (V4SFmode);
3129 operands[4] = gen_reg_rtx (V4SFmode);
3132 (define_expand "vec_pack_trunc_v2df"
3133 [(match_operand:V4SF 0 "register_operand" "")
3134 (match_operand:V2DF 1 "nonimmediate_operand" "")
3135 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3140 r1 = gen_reg_rtx (V4SFmode);
3141 r2 = gen_reg_rtx (V4SFmode);
3143 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3144 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3145 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3149 (define_expand "vec_pack_sfix_trunc_v2df"
3150 [(match_operand:V4SI 0 "register_operand" "")
3151 (match_operand:V2DF 1 "nonimmediate_operand" "")
3152 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3157 r1 = gen_reg_rtx (V4SImode);
3158 r2 = gen_reg_rtx (V4SImode);
3160 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3161 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3162 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3163 gen_lowpart (V2DImode, r1),
3164 gen_lowpart (V2DImode, r2)));
3168 (define_expand "vec_pack_sfix_v2df"
3169 [(match_operand:V4SI 0 "register_operand" "")
3170 (match_operand:V2DF 1 "nonimmediate_operand" "")
3171 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3176 r1 = gen_reg_rtx (V4SImode);
3177 r2 = gen_reg_rtx (V4SImode);
3179 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3180 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3181 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3182 gen_lowpart (V2DImode, r1),
3183 gen_lowpart (V2DImode, r2)));
3187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3189 ;; Parallel single-precision floating point element swizzling
3191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3193 (define_expand "sse_movhlps_exp"
3194 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3197 (match_operand:V4SF 1 "nonimmediate_operand" "")
3198 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3199 (parallel [(const_int 6)
3204 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3206 (define_insn "*avx_movhlps"
3207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3210 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3211 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3212 (parallel [(const_int 6)
3216 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3218 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3219 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3220 vmovhps\t{%2, %0|%0, %2}"
3221 [(set_attr "type" "ssemov")
3222 (set_attr "prefix" "vex")
3223 (set_attr "mode" "V4SF,V2SF,V2SF")])
3225 (define_insn "sse_movhlps"
3226 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3229 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3230 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3231 (parallel [(const_int 6)
3235 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3237 movhlps\t{%2, %0|%0, %2}
3238 movlps\t{%H2, %0|%0, %H2}
3239 movhps\t{%2, %0|%0, %2}"
3240 [(set_attr "type" "ssemov")
3241 (set_attr "mode" "V4SF,V2SF,V2SF")])
3243 (define_expand "sse_movlhps_exp"
3244 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3247 (match_operand:V4SF 1 "nonimmediate_operand" "")
3248 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3249 (parallel [(const_int 0)
3254 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3256 (define_insn "*avx_movlhps"
3257 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3260 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3261 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3262 (parallel [(const_int 0)
3266 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3268 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3269 vmovhps\t{%2, %1, %0|%0, %1, %2}
3270 vmovlps\t{%2, %H0|%H0, %2}"
3271 [(set_attr "type" "ssemov")
3272 (set_attr "prefix" "vex")
3273 (set_attr "mode" "V4SF,V2SF,V2SF")])
3275 (define_insn "sse_movlhps"
3276 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3279 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3280 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3281 (parallel [(const_int 0)
3285 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3287 movlhps\t{%2, %0|%0, %2}
3288 movhps\t{%2, %0|%0, %2}
3289 movlps\t{%2, %H0|%H0, %2}"
3290 [(set_attr "type" "ssemov")
3291 (set_attr "mode" "V4SF,V2SF,V2SF")])
3293 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3294 (define_insn "avx_unpckhps256"
3295 [(set (match_operand:V8SF 0 "register_operand" "=x")
3298 (match_operand:V8SF 1 "register_operand" "x")
3299 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3300 (parallel [(const_int 2) (const_int 10)
3301 (const_int 3) (const_int 11)
3302 (const_int 6) (const_int 14)
3303 (const_int 7) (const_int 15)])))]
3305 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3306 [(set_attr "type" "sselog")
3307 (set_attr "prefix" "vex")
3308 (set_attr "mode" "V8SF")])
3310 (define_insn "*avx_interleave_highv4sf"
3311 [(set (match_operand:V4SF 0 "register_operand" "=x")
3314 (match_operand:V4SF 1 "register_operand" "x")
3315 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3316 (parallel [(const_int 2) (const_int 6)
3317 (const_int 3) (const_int 7)])))]
3319 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3320 [(set_attr "type" "sselog")
3321 (set_attr "prefix" "vex")
3322 (set_attr "mode" "V4SF")])
3324 (define_expand "vec_interleave_highv8sf"
3328 (match_operand:V8SF 1 "register_operand" "x")
3329 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3330 (parallel [(const_int 0) (const_int 8)
3331 (const_int 1) (const_int 9)
3332 (const_int 4) (const_int 12)
3333 (const_int 5) (const_int 13)])))
3339 (parallel [(const_int 2) (const_int 10)
3340 (const_int 3) (const_int 11)
3341 (const_int 6) (const_int 14)
3342 (const_int 7) (const_int 15)])))
3343 (set (match_operand:V8SF 0 "register_operand" "")
3348 (parallel [(const_int 4) (const_int 5)
3349 (const_int 6) (const_int 7)
3350 (const_int 12) (const_int 13)
3351 (const_int 14) (const_int 15)])))]
3354 operands[3] = gen_reg_rtx (V8SFmode);
3355 operands[4] = gen_reg_rtx (V8SFmode);
3358 (define_insn "vec_interleave_highv4sf"
3359 [(set (match_operand:V4SF 0 "register_operand" "=x")
3362 (match_operand:V4SF 1 "register_operand" "0")
3363 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3364 (parallel [(const_int 2) (const_int 6)
3365 (const_int 3) (const_int 7)])))]
3367 "unpckhps\t{%2, %0|%0, %2}"
3368 [(set_attr "type" "sselog")
3369 (set_attr "mode" "V4SF")])
3371 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3372 (define_insn "avx_unpcklps256"
3373 [(set (match_operand:V8SF 0 "register_operand" "=x")
3376 (match_operand:V8SF 1 "register_operand" "x")
3377 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3378 (parallel [(const_int 0) (const_int 8)
3379 (const_int 1) (const_int 9)
3380 (const_int 4) (const_int 12)
3381 (const_int 5) (const_int 13)])))]
3383 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3384 [(set_attr "type" "sselog")
3385 (set_attr "prefix" "vex")
3386 (set_attr "mode" "V8SF")])
3388 (define_insn "*avx_interleave_lowv4sf"
3389 [(set (match_operand:V4SF 0 "register_operand" "=x")
3392 (match_operand:V4SF 1 "register_operand" "x")
3393 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3394 (parallel [(const_int 0) (const_int 4)
3395 (const_int 1) (const_int 5)])))]
3397 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3398 [(set_attr "type" "sselog")
3399 (set_attr "prefix" "vex")
3400 (set_attr "mode" "V4SF")])
3402 (define_expand "vec_interleave_lowv8sf"
3406 (match_operand:V8SF 1 "register_operand" "x")
3407 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3408 (parallel [(const_int 0) (const_int 8)
3409 (const_int 1) (const_int 9)
3410 (const_int 4) (const_int 12)
3411 (const_int 5) (const_int 13)])))
3417 (parallel [(const_int 2) (const_int 10)
3418 (const_int 3) (const_int 11)
3419 (const_int 6) (const_int 14)
3420 (const_int 7) (const_int 15)])))
3421 (set (match_operand:V8SF 0 "register_operand" "")
3426 (parallel [(const_int 0) (const_int 1)
3427 (const_int 2) (const_int 3)
3428 (const_int 8) (const_int 9)
3429 (const_int 10) (const_int 11)])))]
3432 operands[3] = gen_reg_rtx (V8SFmode);
3433 operands[4] = gen_reg_rtx (V8SFmode);
3436 (define_insn "vec_interleave_lowv4sf"
3437 [(set (match_operand:V4SF 0 "register_operand" "=x")
3440 (match_operand:V4SF 1 "register_operand" "0")
3441 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3442 (parallel [(const_int 0) (const_int 4)
3443 (const_int 1) (const_int 5)])))]
3445 "unpcklps\t{%2, %0|%0, %2}"
3446 [(set_attr "type" "sselog")
3447 (set_attr "mode" "V4SF")])
3449 ;; These are modeled with the same vec_concat as the others so that we
3450 ;; capture users of shufps that can use the new instructions
3451 (define_insn "avx_movshdup256"
3452 [(set (match_operand:V8SF 0 "register_operand" "=x")
3455 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3457 (parallel [(const_int 1) (const_int 1)
3458 (const_int 3) (const_int 3)
3459 (const_int 5) (const_int 5)
3460 (const_int 7) (const_int 7)])))]
3462 "vmovshdup\t{%1, %0|%0, %1}"
3463 [(set_attr "type" "sse")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "V8SF")])
3467 (define_insn "sse3_movshdup"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3471 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3473 (parallel [(const_int 1)
3478 "%vmovshdup\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "sse")
3480 (set_attr "prefix_rep" "1")
3481 (set_attr "prefix" "maybe_vex")
3482 (set_attr "mode" "V4SF")])
3484 (define_insn "avx_movsldup256"
3485 [(set (match_operand:V8SF 0 "register_operand" "=x")
3488 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3490 (parallel [(const_int 0) (const_int 0)
3491 (const_int 2) (const_int 2)
3492 (const_int 4) (const_int 4)
3493 (const_int 6) (const_int 6)])))]
3495 "vmovsldup\t{%1, %0|%0, %1}"
3496 [(set_attr "type" "sse")
3497 (set_attr "prefix" "vex")
3498 (set_attr "mode" "V8SF")])
3500 (define_insn "sse3_movsldup"
3501 [(set (match_operand:V4SF 0 "register_operand" "=x")
3504 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3506 (parallel [(const_int 0)
3511 "%vmovsldup\t{%1, %0|%0, %1}"
3512 [(set_attr "type" "sse")
3513 (set_attr "prefix_rep" "1")
3514 (set_attr "prefix" "maybe_vex")
3515 (set_attr "mode" "V4SF")])
3517 (define_expand "avx_shufps256"
3518 [(match_operand:V8SF 0 "register_operand" "")
3519 (match_operand:V8SF 1 "register_operand" "")
3520 (match_operand:V8SF 2 "nonimmediate_operand" "")
3521 (match_operand:SI 3 "const_int_operand" "")]
3524 int mask = INTVAL (operands[3]);
3525 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3526 GEN_INT ((mask >> 0) & 3),
3527 GEN_INT ((mask >> 2) & 3),
3528 GEN_INT (((mask >> 4) & 3) + 8),
3529 GEN_INT (((mask >> 6) & 3) + 8),
3530 GEN_INT (((mask >> 0) & 3) + 4),
3531 GEN_INT (((mask >> 2) & 3) + 4),
3532 GEN_INT (((mask >> 4) & 3) + 12),
3533 GEN_INT (((mask >> 6) & 3) + 12)));
3537 ;; One bit in mask selects 2 elements.
3538 (define_insn "avx_shufps256_1"
3539 [(set (match_operand:V8SF 0 "register_operand" "=x")
3542 (match_operand:V8SF 1 "register_operand" "x")
3543 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3544 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3545 (match_operand 4 "const_0_to_3_operand" "")
3546 (match_operand 5 "const_8_to_11_operand" "")
3547 (match_operand 6 "const_8_to_11_operand" "")
3548 (match_operand 7 "const_4_to_7_operand" "")
3549 (match_operand 8 "const_4_to_7_operand" "")
3550 (match_operand 9 "const_12_to_15_operand" "")
3551 (match_operand 10 "const_12_to_15_operand" "")])))]
3553 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3554 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3555 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3556 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3559 mask = INTVAL (operands[3]);
3560 mask |= INTVAL (operands[4]) << 2;
3561 mask |= (INTVAL (operands[5]) - 8) << 4;
3562 mask |= (INTVAL (operands[6]) - 8) << 6;
3563 operands[3] = GEN_INT (mask);
3565 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3567 [(set_attr "type" "sselog")
3568 (set_attr "length_immediate" "1")
3569 (set_attr "prefix" "vex")
3570 (set_attr "mode" "V8SF")])
3572 (define_expand "sse_shufps"
3573 [(match_operand:V4SF 0 "register_operand" "")
3574 (match_operand:V4SF 1 "register_operand" "")
3575 (match_operand:V4SF 2 "nonimmediate_operand" "")
3576 (match_operand:SI 3 "const_int_operand" "")]
3579 int mask = INTVAL (operands[3]);
3580 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3581 GEN_INT ((mask >> 0) & 3),
3582 GEN_INT ((mask >> 2) & 3),
3583 GEN_INT (((mask >> 4) & 3) + 4),
3584 GEN_INT (((mask >> 6) & 3) + 4)));
3588 (define_insn "*avx_shufps_<mode>"
3589 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3590 (vec_select:SSEMODE4S
3591 (vec_concat:<ssedoublesizemode>
3592 (match_operand:SSEMODE4S 1 "register_operand" "x")
3593 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3594 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3595 (match_operand 4 "const_0_to_3_operand" "")
3596 (match_operand 5 "const_4_to_7_operand" "")
3597 (match_operand 6 "const_4_to_7_operand" "")])))]
3601 mask |= INTVAL (operands[3]) << 0;
3602 mask |= INTVAL (operands[4]) << 2;
3603 mask |= (INTVAL (operands[5]) - 4) << 4;
3604 mask |= (INTVAL (operands[6]) - 4) << 6;
3605 operands[3] = GEN_INT (mask);
3607 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3609 [(set_attr "type" "sselog")
3610 (set_attr "length_immediate" "1")
3611 (set_attr "prefix" "vex")
3612 (set_attr "mode" "V4SF")])
3614 (define_insn "sse_shufps_<mode>"
3615 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3616 (vec_select:SSEMODE4S
3617 (vec_concat:<ssedoublesizemode>
3618 (match_operand:SSEMODE4S 1 "register_operand" "0")
3619 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3620 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3621 (match_operand 4 "const_0_to_3_operand" "")
3622 (match_operand 5 "const_4_to_7_operand" "")
3623 (match_operand 6 "const_4_to_7_operand" "")])))]
3627 mask |= INTVAL (operands[3]) << 0;
3628 mask |= INTVAL (operands[4]) << 2;
3629 mask |= (INTVAL (operands[5]) - 4) << 4;
3630 mask |= (INTVAL (operands[6]) - 4) << 6;
3631 operands[3] = GEN_INT (mask);
3633 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3635 [(set_attr "type" "sselog")
3636 (set_attr "length_immediate" "1")
3637 (set_attr "mode" "V4SF")])
3639 (define_insn "sse_storehps"
3640 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3642 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3643 (parallel [(const_int 2) (const_int 3)])))]
3646 %vmovhps\t{%1, %0|%0, %1}
3647 %vmovhlps\t{%1, %d0|%d0, %1}
3648 %vmovlps\t{%H1, %d0|%d0, %H1}"
3649 [(set_attr "type" "ssemov")
3650 (set_attr "prefix" "maybe_vex")
3651 (set_attr "mode" "V2SF,V4SF,V2SF")])
3653 (define_expand "sse_loadhps_exp"
3654 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3657 (match_operand:V4SF 1 "nonimmediate_operand" "")
3658 (parallel [(const_int 0) (const_int 1)]))
3659 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3661 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3663 (define_insn "*avx_loadhps"
3664 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3667 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3668 (parallel [(const_int 0) (const_int 1)]))
3669 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3672 vmovhps\t{%2, %1, %0|%0, %1, %2}
3673 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3674 vmovlps\t{%2, %H0|%H0, %2}"
3675 [(set_attr "type" "ssemov")
3676 (set_attr "prefix" "vex")
3677 (set_attr "mode" "V2SF,V4SF,V2SF")])
3679 (define_insn "sse_loadhps"
3680 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3683 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3684 (parallel [(const_int 0) (const_int 1)]))
3685 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3688 movhps\t{%2, %0|%0, %2}
3689 movlhps\t{%2, %0|%0, %2}
3690 movlps\t{%2, %H0|%H0, %2}"
3691 [(set_attr "type" "ssemov")
3692 (set_attr "mode" "V2SF,V4SF,V2SF")])
3694 (define_insn "*avx_storelps"
3695 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3697 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3698 (parallel [(const_int 0) (const_int 1)])))]
3701 vmovlps\t{%1, %0|%0, %1}
3702 vmovaps\t{%1, %0|%0, %1}
3703 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3704 [(set_attr "type" "ssemov")
3705 (set_attr "prefix" "vex")
3706 (set_attr "mode" "V2SF,V2DF,V2SF")])
3708 (define_insn "sse_storelps"
3709 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3711 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3712 (parallel [(const_int 0) (const_int 1)])))]
3715 movlps\t{%1, %0|%0, %1}
3716 movaps\t{%1, %0|%0, %1}
3717 movlps\t{%1, %0|%0, %1}"
3718 [(set_attr "type" "ssemov")
3719 (set_attr "mode" "V2SF,V4SF,V2SF")])
3721 (define_expand "sse_loadlps_exp"
3722 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3724 (match_operand:V2SF 2 "nonimmediate_operand" "")
3726 (match_operand:V4SF 1 "nonimmediate_operand" "")
3727 (parallel [(const_int 2) (const_int 3)]))))]
3729 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3731 (define_insn "*avx_loadlps"
3732 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3734 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3736 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3737 (parallel [(const_int 2) (const_int 3)]))))]
3740 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3741 vmovlps\t{%2, %1, %0|%0, %1, %2}
3742 vmovlps\t{%2, %0|%0, %2}"
3743 [(set_attr "type" "sselog,ssemov,ssemov")
3744 (set_attr "length_immediate" "1,*,*")
3745 (set_attr "prefix" "vex")
3746 (set_attr "mode" "V4SF,V2SF,V2SF")])
3748 (define_insn "sse_loadlps"
3749 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3751 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3753 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3754 (parallel [(const_int 2) (const_int 3)]))))]
3757 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3758 movlps\t{%2, %0|%0, %2}
3759 movlps\t{%2, %0|%0, %2}"
3760 [(set_attr "type" "sselog,ssemov,ssemov")
3761 (set_attr "length_immediate" "1,*,*")
3762 (set_attr "mode" "V4SF,V2SF,V2SF")])
3764 (define_insn "*avx_movss"
3765 [(set (match_operand:V4SF 0 "register_operand" "=x")
3767 (match_operand:V4SF 2 "register_operand" "x")
3768 (match_operand:V4SF 1 "register_operand" "x")
3771 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3772 [(set_attr "type" "ssemov")
3773 (set_attr "prefix" "vex")
3774 (set_attr "mode" "SF")])
3776 (define_insn "sse_movss"
3777 [(set (match_operand:V4SF 0 "register_operand" "=x")
3779 (match_operand:V4SF 2 "register_operand" "x")
3780 (match_operand:V4SF 1 "register_operand" "0")
3783 "movss\t{%2, %0|%0, %2}"
3784 [(set_attr "type" "ssemov")
3785 (set_attr "mode" "SF")])
3787 (define_expand "vec_dupv4sf"
3788 [(set (match_operand:V4SF 0 "register_operand" "")
3790 (match_operand:SF 1 "nonimmediate_operand" "")))]
3794 operands[1] = force_reg (V4SFmode, operands[1]);
3797 (define_insn "*vec_dupv4sf_avx"
3798 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3800 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3803 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3804 vbroadcastss\t{%1, %0|%0, %1}"
3805 [(set_attr "type" "sselog1,ssemov")
3806 (set_attr "length_immediate" "1,0")
3807 (set_attr "prefix_extra" "0,1")
3808 (set_attr "prefix" "vex")
3809 (set_attr "mode" "V4SF")])
3811 (define_insn "*vec_dupv4sf"
3812 [(set (match_operand:V4SF 0 "register_operand" "=x")
3814 (match_operand:SF 1 "register_operand" "0")))]
3816 "shufps\t{$0, %0, %0|%0, %0, 0}"
3817 [(set_attr "type" "sselog1")
3818 (set_attr "length_immediate" "1")
3819 (set_attr "mode" "V4SF")])
3821 (define_insn "*vec_concatv2sf_avx"
3822 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3824 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3825 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3828 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3829 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3830 vmovss\t{%1, %0|%0, %1}
3831 punpckldq\t{%2, %0|%0, %2}
3832 movd\t{%1, %0|%0, %1}"
3833 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3834 (set_attr "length_immediate" "*,1,*,*,*")
3835 (set_attr "prefix_extra" "*,1,*,*,*")
3836 (set (attr "prefix")
3837 (if_then_else (eq_attr "alternative" "3,4")
3838 (const_string "orig")
3839 (const_string "vex")))
3840 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3842 ;; Although insertps takes register source, we prefer
3843 ;; unpcklps with register source since it is shorter.
3844 (define_insn "*vec_concatv2sf_sse4_1"
3845 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3847 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3848 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3851 unpcklps\t{%2, %0|%0, %2}
3852 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3853 movss\t{%1, %0|%0, %1}
3854 punpckldq\t{%2, %0|%0, %2}
3855 movd\t{%1, %0|%0, %1}"
3856 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3857 (set_attr "prefix_data16" "*,1,*,*,*")
3858 (set_attr "prefix_extra" "*,1,*,*,*")
3859 (set_attr "length_immediate" "*,1,*,*,*")
3860 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3862 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3863 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3864 ;; alternatives pretty much forces the MMX alternative to be chosen.
3865 (define_insn "*vec_concatv2sf_sse"
3866 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3868 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3869 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3872 unpcklps\t{%2, %0|%0, %2}
3873 movss\t{%1, %0|%0, %1}
3874 punpckldq\t{%2, %0|%0, %2}
3875 movd\t{%1, %0|%0, %1}"
3876 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3877 (set_attr "mode" "V4SF,SF,DI,DI")])
3879 (define_insn "*vec_concatv4sf_avx"
3880 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3882 (match_operand:V2SF 1 "register_operand" " x,x")
3883 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3886 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3887 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3888 [(set_attr "type" "ssemov")
3889 (set_attr "prefix" "vex")
3890 (set_attr "mode" "V4SF,V2SF")])
3892 (define_insn "*vec_concatv4sf_sse"
3893 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3895 (match_operand:V2SF 1 "register_operand" " 0,0")
3896 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3899 movlhps\t{%2, %0|%0, %2}
3900 movhps\t{%2, %0|%0, %2}"
3901 [(set_attr "type" "ssemov")
3902 (set_attr "mode" "V4SF,V2SF")])
3904 (define_expand "vec_init<mode>"
3905 [(match_operand:SSEMODE 0 "register_operand" "")
3906 (match_operand 1 "" "")]
3909 ix86_expand_vector_init (false, operands[0], operands[1]);
3913 (define_insn "*vec_set<mode>_0_avx"
3914 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3915 (vec_merge:SSEMODE4S
3916 (vec_duplicate:SSEMODE4S
3917 (match_operand:<ssescalarmode> 2
3918 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3919 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3923 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3924 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3925 vmovd\t{%2, %0|%0, %2}
3926 vmovss\t{%2, %1, %0|%0, %1, %2}
3927 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3929 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3930 (set_attr "prefix_extra" "*,*,*,*,1,*")
3931 (set_attr "length_immediate" "*,*,*,*,1,*")
3932 (set_attr "prefix" "vex")
3933 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3935 (define_insn "*vec_set<mode>_0_sse4_1"
3936 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3937 (vec_merge:SSEMODE4S
3938 (vec_duplicate:SSEMODE4S
3939 (match_operand:<ssescalarmode> 2
3940 "general_operand" " x,m,*r,x,*rm,*rfF"))
3941 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3945 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3946 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3947 movd\t{%2, %0|%0, %2}
3948 movss\t{%2, %0|%0, %2}
3949 pinsrd\t{$0, %2, %0|%0, %2, 0}
3951 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3952 (set_attr "prefix_extra" "*,*,*,*,1,*")
3953 (set_attr "length_immediate" "*,*,*,*,1,*")
3954 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3956 (define_insn "*vec_set<mode>_0_sse2"
3957 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3958 (vec_merge:SSEMODE4S
3959 (vec_duplicate:SSEMODE4S
3960 (match_operand:<ssescalarmode> 2
3961 "general_operand" " m,*r,x,x*rfF"))
3962 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3966 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3967 movd\t{%2, %0|%0, %2}
3968 movss\t{%2, %0|%0, %2}
3970 [(set_attr "type" "ssemov")
3971 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
3973 (define_insn "vec_set<mode>_0"
3974 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
3975 (vec_merge:SSEMODE4S
3976 (vec_duplicate:SSEMODE4S
3977 (match_operand:<ssescalarmode> 2
3978 "general_operand" " m,x,x*rfF"))
3979 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
3983 movss\t{%2, %0|%0, %2}
3984 movss\t{%2, %0|%0, %2}
3986 [(set_attr "type" "ssemov")
3987 (set_attr "mode" "SF,SF,*")])
3989 ;; A subset is vec_setv4sf.
3990 (define_insn "*vec_setv4sf_avx"
3991 [(set (match_operand:V4SF 0 "register_operand" "=x")
3994 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3995 (match_operand:V4SF 1 "register_operand" "x")
3996 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3999 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4000 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4002 [(set_attr "type" "sselog")
4003 (set_attr "prefix_extra" "1")
4004 (set_attr "length_immediate" "1")
4005 (set_attr "prefix" "vex")
4006 (set_attr "mode" "V4SF")])
4008 (define_insn "*vec_setv4sf_sse4_1"
4009 [(set (match_operand:V4SF 0 "register_operand" "=x")
4012 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4013 (match_operand:V4SF 1 "register_operand" "0")
4014 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4017 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4018 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4020 [(set_attr "type" "sselog")
4021 (set_attr "prefix_data16" "1")
4022 (set_attr "prefix_extra" "1")
4023 (set_attr "length_immediate" "1")
4024 (set_attr "mode" "V4SF")])
4026 (define_insn "*avx_insertps"
4027 [(set (match_operand:V4SF 0 "register_operand" "=x")
4028 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4029 (match_operand:V4SF 1 "register_operand" "x")
4030 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4033 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4034 [(set_attr "type" "sselog")
4035 (set_attr "prefix" "vex")
4036 (set_attr "prefix_extra" "1")
4037 (set_attr "length_immediate" "1")
4038 (set_attr "mode" "V4SF")])
4040 (define_insn "sse4_1_insertps"
4041 [(set (match_operand:V4SF 0 "register_operand" "=x")
4042 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4043 (match_operand:V4SF 1 "register_operand" "0")
4044 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4047 "insertps\t{%3, %2, %0|%0, %2, %3}";
4048 [(set_attr "type" "sselog")
4049 (set_attr "prefix_data16" "1")
4050 (set_attr "prefix_extra" "1")
4051 (set_attr "length_immediate" "1")
4052 (set_attr "mode" "V4SF")])
4055 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4056 (vec_merge:SSEMODE4S
4057 (vec_duplicate:SSEMODE4S
4058 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4061 "TARGET_SSE && reload_completed"
4064 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4069 (define_expand "vec_set<mode>"
4070 [(match_operand:SSEMODE 0 "register_operand" "")
4071 (match_operand:<ssescalarmode> 1 "register_operand" "")
4072 (match_operand 2 "const_int_operand" "")]
4075 ix86_expand_vector_set (false, operands[0], operands[1],
4076 INTVAL (operands[2]));
4080 (define_insn_and_split "*vec_extractv4sf_0"
4081 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4083 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4084 (parallel [(const_int 0)])))]
4085 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4087 "&& reload_completed"
4090 rtx op1 = operands[1];
4092 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4094 op1 = gen_lowpart (SFmode, op1);
4095 emit_move_insn (operands[0], op1);
4099 (define_expand "avx_vextractf128<mode>"
4100 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4101 (match_operand:AVX256MODE 1 "register_operand" "")
4102 (match_operand:SI 2 "const_0_to_1_operand" "")]
4105 switch (INTVAL (operands[2]))
4108 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4111 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4119 (define_insn_and_split "vec_extract_lo_<mode>"
4120 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4121 (vec_select:<avxhalfvecmode>
4122 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4123 (parallel [(const_int 0) (const_int 1)])))]
4126 "&& reload_completed"
4129 rtx op1 = operands[1];
4131 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4133 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4134 emit_move_insn (operands[0], op1);
4138 (define_insn "vec_extract_hi_<mode>"
4139 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4140 (vec_select:<avxhalfvecmode>
4141 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4142 (parallel [(const_int 2) (const_int 3)])))]
4144 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4145 [(set_attr "type" "sselog")
4146 (set_attr "prefix_extra" "1")
4147 (set_attr "length_immediate" "1")
4148 (set_attr "memory" "none,store")
4149 (set_attr "prefix" "vex")
4150 (set_attr "mode" "V8SF")])
4152 (define_insn_and_split "vec_extract_lo_<mode>"
4153 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4154 (vec_select:<avxhalfvecmode>
4155 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4156 (parallel [(const_int 0) (const_int 1)
4157 (const_int 2) (const_int 3)])))]
4160 "&& reload_completed"
4163 rtx op1 = operands[1];
4165 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4167 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4168 emit_move_insn (operands[0], op1);
4172 (define_insn "vec_extract_hi_<mode>"
4173 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4174 (vec_select:<avxhalfvecmode>
4175 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4176 (parallel [(const_int 4) (const_int 5)
4177 (const_int 6) (const_int 7)])))]
4179 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4180 [(set_attr "type" "sselog")
4181 (set_attr "prefix_extra" "1")
4182 (set_attr "length_immediate" "1")
4183 (set_attr "memory" "none,store")
4184 (set_attr "prefix" "vex")
4185 (set_attr "mode" "V8SF")])
4187 (define_insn_and_split "vec_extract_lo_v16hi"
4188 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4190 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4191 (parallel [(const_int 0) (const_int 1)
4192 (const_int 2) (const_int 3)
4193 (const_int 4) (const_int 5)
4194 (const_int 6) (const_int 7)])))]
4197 "&& reload_completed"
4200 rtx op1 = operands[1];
4202 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4204 op1 = gen_lowpart (V8HImode, op1);
4205 emit_move_insn (operands[0], op1);
4209 (define_insn "vec_extract_hi_v16hi"
4210 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4212 (match_operand:V16HI 1 "register_operand" "x,x")
4213 (parallel [(const_int 8) (const_int 9)
4214 (const_int 10) (const_int 11)
4215 (const_int 12) (const_int 13)
4216 (const_int 14) (const_int 15)])))]
4218 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_extra" "1")
4221 (set_attr "length_immediate" "1")
4222 (set_attr "memory" "none,store")
4223 (set_attr "prefix" "vex")
4224 (set_attr "mode" "V8SF")])
4226 (define_insn_and_split "vec_extract_lo_v32qi"
4227 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4229 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4230 (parallel [(const_int 0) (const_int 1)
4231 (const_int 2) (const_int 3)
4232 (const_int 4) (const_int 5)
4233 (const_int 6) (const_int 7)
4234 (const_int 8) (const_int 9)
4235 (const_int 10) (const_int 11)
4236 (const_int 12) (const_int 13)
4237 (const_int 14) (const_int 15)])))]
4240 "&& reload_completed"
4243 rtx op1 = operands[1];
4245 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4247 op1 = gen_lowpart (V16QImode, op1);
4248 emit_move_insn (operands[0], op1);
4252 (define_insn "vec_extract_hi_v32qi"
4253 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4255 (match_operand:V32QI 1 "register_operand" "x,x")
4256 (parallel [(const_int 16) (const_int 17)
4257 (const_int 18) (const_int 19)
4258 (const_int 20) (const_int 21)
4259 (const_int 22) (const_int 23)
4260 (const_int 24) (const_int 25)
4261 (const_int 26) (const_int 27)
4262 (const_int 28) (const_int 29)
4263 (const_int 30) (const_int 31)])))]
4265 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4266 [(set_attr "type" "sselog")
4267 (set_attr "prefix_extra" "1")
4268 (set_attr "length_immediate" "1")
4269 (set_attr "memory" "none,store")
4270 (set_attr "prefix" "vex")
4271 (set_attr "mode" "V8SF")])
4273 (define_insn "*sse4_1_extractps"
4274 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4276 (match_operand:V4SF 1 "register_operand" "x")
4277 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4279 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4280 [(set_attr "type" "sselog")
4281 (set_attr "prefix_data16" "1")
4282 (set_attr "prefix_extra" "1")
4283 (set_attr "length_immediate" "1")
4284 (set_attr "prefix" "maybe_vex")
4285 (set_attr "mode" "V4SF")])
4287 (define_insn_and_split "*vec_extract_v4sf_mem"
4288 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4290 (match_operand:V4SF 1 "memory_operand" "o")
4291 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4297 int i = INTVAL (operands[2]);
4299 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4303 (define_expand "vec_extract<mode>"
4304 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4305 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4306 (match_operand 2 "const_int_operand" "")]
4309 ix86_expand_vector_extract (false, operands[0], operands[1],
4310 INTVAL (operands[2]));
4314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4316 ;; Parallel double-precision floating point element swizzling
4318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4320 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4321 (define_insn "avx_unpckhpd256"
4322 [(set (match_operand:V4DF 0 "register_operand" "=x")
4325 (match_operand:V4DF 1 "register_operand" "x")
4326 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4327 (parallel [(const_int 1) (const_int 5)
4328 (const_int 3) (const_int 7)])))]
4330 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4331 [(set_attr "type" "sselog")
4332 (set_attr "prefix" "vex")
4333 (set_attr "mode" "V4DF")])
4335 (define_expand "vec_interleave_highv4df"
4339 (match_operand:V4DF 1 "register_operand" "x")
4340 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4341 (parallel [(const_int 0) (const_int 4)
4342 (const_int 2) (const_int 6)])))
4348 (parallel [(const_int 1) (const_int 5)
4349 (const_int 3) (const_int 7)])))
4350 (set (match_operand:V4DF 0 "register_operand" "")
4355 (parallel [(const_int 2) (const_int 3)
4356 (const_int 6) (const_int 7)])))]
4359 operands[3] = gen_reg_rtx (V4DFmode);
4360 operands[4] = gen_reg_rtx (V4DFmode);
4364 (define_expand "vec_interleave_highv2df"
4365 [(set (match_operand:V2DF 0 "register_operand" "")
4368 (match_operand:V2DF 1 "nonimmediate_operand" "")
4369 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4370 (parallel [(const_int 1)
4374 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4375 operands[2] = force_reg (V2DFmode, operands[2]);
4378 (define_insn "*avx_interleave_highv2df"
4379 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4382 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4383 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4384 (parallel [(const_int 1)
4386 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4388 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4389 vmovddup\t{%H1, %0|%0, %H1}
4390 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4391 vmovhpd\t{%1, %0|%0, %1}"
4392 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4393 (set_attr "prefix" "vex")
4394 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4396 (define_insn "*sse3_interleave_highv2df"
4397 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4400 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4401 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4402 (parallel [(const_int 1)
4404 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4406 unpckhpd\t{%2, %0|%0, %2}
4407 movddup\t{%H1, %0|%0, %H1}
4408 movlpd\t{%H1, %0|%0, %H1}
4409 movhpd\t{%1, %0|%0, %1}"
4410 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4411 (set_attr "prefix_data16" "*,*,1,1")
4412 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4414 (define_insn "*sse2_interleave_highv2df"
4415 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4418 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4419 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4420 (parallel [(const_int 1)
4422 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4424 unpckhpd\t{%2, %0|%0, %2}
4425 movlpd\t{%H1, %0|%0, %H1}
4426 movhpd\t{%1, %0|%0, %1}"
4427 [(set_attr "type" "sselog,ssemov,ssemov")
4428 (set_attr "prefix_data16" "*,1,1")
4429 (set_attr "mode" "V2DF,V1DF,V1DF")])
4431 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4432 (define_expand "avx_movddup256"
4433 [(set (match_operand:V4DF 0 "register_operand" "")
4436 (match_operand:V4DF 1 "nonimmediate_operand" "")
4438 (parallel [(const_int 0) (const_int 4)
4439 (const_int 2) (const_int 6)])))]
4442 (define_expand "avx_unpcklpd256"
4443 [(set (match_operand:V4DF 0 "register_operand" "")
4446 (match_operand:V4DF 1 "register_operand" "")
4447 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4448 (parallel [(const_int 0) (const_int 4)
4449 (const_int 2) (const_int 6)])))]
4452 (define_insn "*avx_unpcklpd256"
4453 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4456 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4457 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4458 (parallel [(const_int 0) (const_int 4)
4459 (const_int 2) (const_int 6)])))]
4461 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4463 vmovddup\t{%1, %0|%0, %1}
4464 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4465 [(set_attr "type" "sselog")
4466 (set_attr "prefix" "vex")
4467 (set_attr "mode" "V4DF")])
4469 (define_expand "vec_interleave_lowv4df"
4473 (match_operand:V4DF 1 "register_operand" "x")
4474 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4475 (parallel [(const_int 0) (const_int 4)
4476 (const_int 2) (const_int 6)])))
4482 (parallel [(const_int 1) (const_int 5)
4483 (const_int 3) (const_int 7)])))
4484 (set (match_operand:V4DF 0 "register_operand" "")
4489 (parallel [(const_int 0) (const_int 1)
4490 (const_int 4) (const_int 5)])))]
4493 operands[3] = gen_reg_rtx (V4DFmode);
4494 operands[4] = gen_reg_rtx (V4DFmode);
4497 (define_expand "vec_interleave_lowv2df"
4498 [(set (match_operand:V2DF 0 "register_operand" "")
4501 (match_operand:V2DF 1 "nonimmediate_operand" "")
4502 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4503 (parallel [(const_int 0)
4507 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4508 operands[1] = force_reg (V2DFmode, operands[1]);
4511 (define_insn "*avx_interleave_lowv2df"
4512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4515 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4516 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4517 (parallel [(const_int 0)
4519 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4521 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4522 vmovddup\t{%1, %0|%0, %1}
4523 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4524 vmovlpd\t{%2, %H0|%H0, %2}"
4525 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4526 (set_attr "prefix" "vex")
4527 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4529 (define_insn "*sse3_interleave_lowv2df"
4530 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4533 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4534 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4535 (parallel [(const_int 0)
4537 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4539 unpcklpd\t{%2, %0|%0, %2}
4540 movddup\t{%1, %0|%0, %1}
4541 movhpd\t{%2, %0|%0, %2}
4542 movlpd\t{%2, %H0|%H0, %2}"
4543 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4544 (set_attr "prefix_data16" "*,*,1,1")
4545 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4547 (define_insn "*sse2_interleave_lowv2df"
4548 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4551 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4552 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4553 (parallel [(const_int 0)
4555 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4557 unpcklpd\t{%2, %0|%0, %2}
4558 movhpd\t{%2, %0|%0, %2}
4559 movlpd\t{%2, %H0|%H0, %2}"
4560 [(set_attr "type" "sselog,ssemov,ssemov")
4561 (set_attr "prefix_data16" "*,1,1")
4562 (set_attr "mode" "V2DF,V1DF,V1DF")])
4565 [(set (match_operand:V2DF 0 "memory_operand" "")
4568 (match_operand:V2DF 1 "register_operand" "")
4570 (parallel [(const_int 0)
4572 "TARGET_SSE3 && reload_completed"
4575 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4576 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4577 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4582 [(set (match_operand:V2DF 0 "register_operand" "")
4585 (match_operand:V2DF 1 "memory_operand" "")
4587 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4588 (match_operand:SI 3 "const_int_operand" "")])))]
4589 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4590 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4592 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4595 (define_expand "avx_shufpd256"
4596 [(match_operand:V4DF 0 "register_operand" "")
4597 (match_operand:V4DF 1 "register_operand" "")
4598 (match_operand:V4DF 2 "nonimmediate_operand" "")
4599 (match_operand:SI 3 "const_int_operand" "")]
4602 int mask = INTVAL (operands[3]);
4603 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4605 GEN_INT (mask & 2 ? 5 : 4),
4606 GEN_INT (mask & 4 ? 3 : 2),
4607 GEN_INT (mask & 8 ? 7 : 6)));
4611 (define_insn "avx_shufpd256_1"
4612 [(set (match_operand:V4DF 0 "register_operand" "=x")
4615 (match_operand:V4DF 1 "register_operand" "x")
4616 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4617 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4618 (match_operand 4 "const_4_to_5_operand" "")
4619 (match_operand 5 "const_2_to_3_operand" "")
4620 (match_operand 6 "const_6_to_7_operand" "")])))]
4624 mask = INTVAL (operands[3]);
4625 mask |= (INTVAL (operands[4]) - 4) << 1;
4626 mask |= (INTVAL (operands[5]) - 2) << 2;
4627 mask |= (INTVAL (operands[6]) - 6) << 3;
4628 operands[3] = GEN_INT (mask);
4630 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4632 [(set_attr "type" "sselog")
4633 (set_attr "length_immediate" "1")
4634 (set_attr "prefix" "vex")
4635 (set_attr "mode" "V4DF")])
4637 (define_expand "sse2_shufpd"
4638 [(match_operand:V2DF 0 "register_operand" "")
4639 (match_operand:V2DF 1 "register_operand" "")
4640 (match_operand:V2DF 2 "nonimmediate_operand" "")
4641 (match_operand:SI 3 "const_int_operand" "")]
4644 int mask = INTVAL (operands[3]);
4645 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4647 GEN_INT (mask & 2 ? 3 : 2)));
4651 (define_expand "vec_extract_even<mode>"
4652 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4653 (match_operand:SSEMODE_EO 1 "register_operand" "")
4654 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4657 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4661 (define_expand "vec_extract_odd<mode>"
4662 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4663 (match_operand:SSEMODE_EO 1 "register_operand" "")
4664 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4667 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4671 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4672 (define_insn "*avx_interleave_highv2di"
4673 [(set (match_operand:V2DI 0 "register_operand" "=x")
4676 (match_operand:V2DI 1 "register_operand" "x")
4677 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4678 (parallel [(const_int 1)
4681 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4682 [(set_attr "type" "sselog")
4683 (set_attr "prefix" "vex")
4684 (set_attr "mode" "TI")])
4686 (define_insn "vec_interleave_highv2di"
4687 [(set (match_operand:V2DI 0 "register_operand" "=x")
4690 (match_operand:V2DI 1 "register_operand" "0")
4691 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4692 (parallel [(const_int 1)
4695 "punpckhqdq\t{%2, %0|%0, %2}"
4696 [(set_attr "type" "sselog")
4697 (set_attr "prefix_data16" "1")
4698 (set_attr "mode" "TI")])
4700 (define_insn "*avx_interleave_lowv2di"
4701 [(set (match_operand:V2DI 0 "register_operand" "=x")
4704 (match_operand:V2DI 1 "register_operand" "x")
4705 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4706 (parallel [(const_int 0)
4709 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4710 [(set_attr "type" "sselog")
4711 (set_attr "prefix" "vex")
4712 (set_attr "mode" "TI")])
4714 (define_insn "vec_interleave_lowv2di"
4715 [(set (match_operand:V2DI 0 "register_operand" "=x")
4718 (match_operand:V2DI 1 "register_operand" "0")
4719 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4720 (parallel [(const_int 0)
4723 "punpcklqdq\t{%2, %0|%0, %2}"
4724 [(set_attr "type" "sselog")
4725 (set_attr "prefix_data16" "1")
4726 (set_attr "mode" "TI")])
4728 (define_insn "*avx_shufpd_<mode>"
4729 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4730 (vec_select:SSEMODE2D
4731 (vec_concat:<ssedoublesizemode>
4732 (match_operand:SSEMODE2D 1 "register_operand" "x")
4733 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4734 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4735 (match_operand 4 "const_2_to_3_operand" "")])))]
4739 mask = INTVAL (operands[3]);
4740 mask |= (INTVAL (operands[4]) - 2) << 1;
4741 operands[3] = GEN_INT (mask);
4743 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4745 [(set_attr "type" "sselog")
4746 (set_attr "length_immediate" "1")
4747 (set_attr "prefix" "vex")
4748 (set_attr "mode" "V2DF")])
4750 (define_insn "sse2_shufpd_<mode>"
4751 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4752 (vec_select:SSEMODE2D
4753 (vec_concat:<ssedoublesizemode>
4754 (match_operand:SSEMODE2D 1 "register_operand" "0")
4755 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4756 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4757 (match_operand 4 "const_2_to_3_operand" "")])))]
4761 mask = INTVAL (operands[3]);
4762 mask |= (INTVAL (operands[4]) - 2) << 1;
4763 operands[3] = GEN_INT (mask);
4765 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4767 [(set_attr "type" "sselog")
4768 (set_attr "length_immediate" "1")
4769 (set_attr "mode" "V2DF")])
4771 ;; Avoid combining registers from different units in a single alternative,
4772 ;; see comment above inline_secondary_memory_needed function in i386.c
4773 (define_insn "*avx_storehpd"
4774 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4776 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4777 (parallel [(const_int 1)])))]
4778 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4780 vmovhpd\t{%1, %0|%0, %1}
4781 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4785 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4786 (set_attr "prefix" "vex")
4787 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4789 (define_insn "sse2_storehpd"
4790 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4792 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4793 (parallel [(const_int 1)])))]
4794 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4796 movhpd\t{%1, %0|%0, %1}
4801 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4802 (set_attr "prefix_data16" "1,*,*,*,*")
4803 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4806 [(set (match_operand:DF 0 "register_operand" "")
4808 (match_operand:V2DF 1 "memory_operand" "")
4809 (parallel [(const_int 1)])))]
4810 "TARGET_SSE2 && reload_completed"
4811 [(set (match_dup 0) (match_dup 1))]
4812 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4814 ;; Avoid combining registers from different units in a single alternative,
4815 ;; see comment above inline_secondary_memory_needed function in i386.c
4816 (define_insn "sse2_storelpd"
4817 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4819 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4820 (parallel [(const_int 0)])))]
4821 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4823 %vmovlpd\t{%1, %0|%0, %1}
4828 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4829 (set_attr "prefix_data16" "1,*,*,*,*")
4830 (set_attr "prefix" "maybe_vex")
4831 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4834 [(set (match_operand:DF 0 "register_operand" "")
4836 (match_operand:V2DF 1 "nonimmediate_operand" "")
4837 (parallel [(const_int 0)])))]
4838 "TARGET_SSE2 && reload_completed"
4841 rtx op1 = operands[1];
4843 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4845 op1 = gen_lowpart (DFmode, op1);
4846 emit_move_insn (operands[0], op1);
4850 (define_expand "sse2_loadhpd_exp"
4851 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4854 (match_operand:V2DF 1 "nonimmediate_operand" "")
4855 (parallel [(const_int 0)]))
4856 (match_operand:DF 2 "nonimmediate_operand" "")))]
4858 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4860 ;; Avoid combining registers from different units in a single alternative,
4861 ;; see comment above inline_secondary_memory_needed function in i386.c
4862 (define_insn "*avx_loadhpd"
4863 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4866 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4867 (parallel [(const_int 0)]))
4868 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4869 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4871 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4872 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4876 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4877 (set_attr "prefix" "vex")
4878 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4880 (define_insn "sse2_loadhpd"
4881 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4884 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4885 (parallel [(const_int 0)]))
4886 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4887 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4889 movhpd\t{%2, %0|%0, %2}
4890 unpcklpd\t{%2, %0|%0, %2}
4891 shufpd\t{$1, %1, %0|%0, %1, 1}
4895 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4896 (set_attr "prefix_data16" "1,*,*,*,*,*")
4897 (set_attr "length_immediate" "*,*,1,*,*,*")
4898 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4901 [(set (match_operand:V2DF 0 "memory_operand" "")
4903 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4904 (match_operand:DF 1 "register_operand" "")))]
4905 "TARGET_SSE2 && reload_completed"
4906 [(set (match_dup 0) (match_dup 1))]
4907 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4909 (define_expand "sse2_loadlpd_exp"
4910 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4912 (match_operand:DF 2 "nonimmediate_operand" "")
4914 (match_operand:V2DF 1 "nonimmediate_operand" "")
4915 (parallel [(const_int 1)]))))]
4917 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4919 ;; Avoid combining registers from different units in a single alternative,
4920 ;; see comment above inline_secondary_memory_needed function in i386.c
4921 (define_insn "*avx_loadlpd"
4922 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4924 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4926 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4927 (parallel [(const_int 1)]))))]
4928 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4930 vmovsd\t{%2, %0|%0, %2}
4931 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4932 vmovsd\t{%2, %1, %0|%0, %1, %2}
4933 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4937 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4938 (set_attr "prefix" "vex")
4939 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4941 (define_insn "sse2_loadlpd"
4942 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4944 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4946 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4947 (parallel [(const_int 1)]))))]
4948 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4950 movsd\t{%2, %0|%0, %2}
4951 movlpd\t{%2, %0|%0, %2}
4952 movsd\t{%2, %0|%0, %2}
4953 shufpd\t{$2, %2, %0|%0, %2, 2}
4954 movhpd\t{%H1, %0|%0, %H1}
4958 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4959 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4960 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4961 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4964 [(set (match_operand:V2DF 0 "memory_operand" "")
4966 (match_operand:DF 1 "register_operand" "")
4967 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4968 "TARGET_SSE2 && reload_completed"
4969 [(set (match_dup 0) (match_dup 1))]
4970 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4972 ;; Not sure these two are ever used, but it doesn't hurt to have
4974 (define_insn "*vec_extractv2df_1_sse"
4975 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4977 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4978 (parallel [(const_int 1)])))]
4979 "!TARGET_SSE2 && TARGET_SSE
4980 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4982 movhps\t{%1, %0|%0, %1}
4983 movhlps\t{%1, %0|%0, %1}
4984 movlps\t{%H1, %0|%0, %H1}"
4985 [(set_attr "type" "ssemov")
4986 (set_attr "mode" "V2SF,V4SF,V2SF")])
4988 (define_insn "*vec_extractv2df_0_sse"
4989 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4991 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4992 (parallel [(const_int 0)])))]
4993 "!TARGET_SSE2 && TARGET_SSE
4994 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4996 movlps\t{%1, %0|%0, %1}
4997 movaps\t{%1, %0|%0, %1}
4998 movlps\t{%1, %0|%0, %1}"
4999 [(set_attr "type" "ssemov")
5000 (set_attr "mode" "V2SF,V4SF,V2SF")])
5002 (define_insn "*avx_movsd"
5003 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5005 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5006 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5010 vmovsd\t{%2, %1, %0|%0, %1, %2}
5011 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5012 vmovlpd\t{%2, %0|%0, %2}
5013 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5014 vmovhps\t{%1, %H0|%H0, %1}"
5015 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5016 (set_attr "prefix" "vex")
5017 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5019 (define_insn "sse2_movsd"
5020 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5022 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5023 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5027 movsd\t{%2, %0|%0, %2}
5028 movlpd\t{%2, %0|%0, %2}
5029 movlpd\t{%2, %0|%0, %2}
5030 shufpd\t{$2, %2, %0|%0, %2, 2}
5031 movhps\t{%H1, %0|%0, %H1}
5032 movhps\t{%1, %H0|%H0, %1}"
5033 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5034 (set_attr "prefix_data16" "*,1,1,*,*,*")
5035 (set_attr "length_immediate" "*,*,*,1,*,*")
5036 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5038 (define_insn "*vec_dupv2df_sse3"
5039 [(set (match_operand:V2DF 0 "register_operand" "=x")
5041 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5043 "%vmovddup\t{%1, %0|%0, %1}"
5044 [(set_attr "type" "sselog1")
5045 (set_attr "prefix" "maybe_vex")
5046 (set_attr "mode" "DF")])
5048 (define_insn "vec_dupv2df"
5049 [(set (match_operand:V2DF 0 "register_operand" "=x")
5051 (match_operand:DF 1 "register_operand" "0")))]
5054 [(set_attr "type" "sselog1")
5055 (set_attr "mode" "V2DF")])
5057 (define_insn "*vec_concatv2df_sse3"
5058 [(set (match_operand:V2DF 0 "register_operand" "=x")
5060 (match_operand:DF 1 "nonimmediate_operand" "xm")
5063 "%vmovddup\t{%1, %0|%0, %1}"
5064 [(set_attr "type" "sselog1")
5065 (set_attr "prefix" "maybe_vex")
5066 (set_attr "mode" "DF")])
5068 (define_insn "*vec_concatv2df_avx"
5069 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5071 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5072 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5075 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5076 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5077 vmovsd\t{%1, %0|%0, %1}"
5078 [(set_attr "type" "ssemov")
5079 (set_attr "prefix" "vex")
5080 (set_attr "mode" "DF,V1DF,DF")])
5082 (define_insn "*vec_concatv2df"
5083 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5085 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5086 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5089 unpcklpd\t{%2, %0|%0, %2}
5090 movhpd\t{%2, %0|%0, %2}
5091 movsd\t{%1, %0|%0, %1}
5092 movlhps\t{%2, %0|%0, %2}
5093 movhps\t{%2, %0|%0, %2}"
5094 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5095 (set_attr "prefix_data16" "*,1,*,*,*")
5096 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5100 ;; Parallel integral arithmetic
5102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5104 (define_expand "neg<mode>2"
5105 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5108 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5110 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5112 (define_expand "<plusminus_insn><mode>3"
5113 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5115 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5116 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5118 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5120 (define_insn "*avx_<plusminus_insn><mode>3"
5121 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5123 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5124 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5125 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5126 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5127 [(set_attr "type" "sseiadd")
5128 (set_attr "prefix" "vex")
5129 (set_attr "mode" "TI")])
5131 (define_insn "*<plusminus_insn><mode>3"
5132 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5134 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5135 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5136 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5137 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5138 [(set_attr "type" "sseiadd")
5139 (set_attr "prefix_data16" "1")
5140 (set_attr "mode" "TI")])
5142 (define_expand "sse2_<plusminus_insn><mode>3"
5143 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5144 (sat_plusminus:SSEMODE12
5145 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5146 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5148 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5150 (define_insn "*avx_<plusminus_insn><mode>3"
5151 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5152 (sat_plusminus:SSEMODE12
5153 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5154 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5155 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5156 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5157 [(set_attr "type" "sseiadd")
5158 (set_attr "prefix" "vex")
5159 (set_attr "mode" "TI")])
5161 (define_insn "*sse2_<plusminus_insn><mode>3"
5162 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5163 (sat_plusminus:SSEMODE12
5164 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5165 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5166 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5167 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5168 [(set_attr "type" "sseiadd")
5169 (set_attr "prefix_data16" "1")
5170 (set_attr "mode" "TI")])
5172 (define_insn_and_split "mulv16qi3"
5173 [(set (match_operand:V16QI 0 "register_operand" "")
5174 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5175 (match_operand:V16QI 2 "register_operand" "")))]
5177 && can_create_pseudo_p ()"
5185 for (i = 0; i < 6; ++i)
5186 t[i] = gen_reg_rtx (V16QImode);
5188 /* Unpack data such that we've got a source byte in each low byte of
5189 each word. We don't care what goes into the high byte of each word.
5190 Rather than trying to get zero in there, most convenient is to let
5191 it be a copy of the low byte. */
5192 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5193 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5194 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5195 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5197 /* Multiply words. The end-of-line annotations here give a picture of what
5198 the output of that instruction looks like. Dot means don't care; the
5199 letters are the bytes of the result with A being the most significant. */
5200 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5201 gen_lowpart (V8HImode, t[0]),
5202 gen_lowpart (V8HImode, t[1])));
5203 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5204 gen_lowpart (V8HImode, t[2]),
5205 gen_lowpart (V8HImode, t[3])));
5207 /* Extract the even bytes and merge them back together. */
5208 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5212 (define_expand "mulv8hi3"
5213 [(set (match_operand:V8HI 0 "register_operand" "")
5214 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5215 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5217 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5219 (define_insn "*avx_mulv8hi3"
5220 [(set (match_operand:V8HI 0 "register_operand" "=x")
5221 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5223 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5224 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5225 [(set_attr "type" "sseimul")
5226 (set_attr "prefix" "vex")
5227 (set_attr "mode" "TI")])
5229 (define_insn "*mulv8hi3"
5230 [(set (match_operand:V8HI 0 "register_operand" "=x")
5231 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5232 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5233 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5234 "pmullw\t{%2, %0|%0, %2}"
5235 [(set_attr "type" "sseimul")
5236 (set_attr "prefix_data16" "1")
5237 (set_attr "mode" "TI")])
5239 (define_expand "<s>mulv8hi3_highpart"
5240 [(set (match_operand:V8HI 0 "register_operand" "")
5245 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5247 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5250 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5252 (define_insn "*avx_<s>mulv8hi3_highpart"
5253 [(set (match_operand:V8HI 0 "register_operand" "=x")
5258 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5260 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5262 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5263 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5264 [(set_attr "type" "sseimul")
5265 (set_attr "prefix" "vex")
5266 (set_attr "mode" "TI")])
5268 (define_insn "*<s>mulv8hi3_highpart"
5269 [(set (match_operand:V8HI 0 "register_operand" "=x")
5274 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5276 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5278 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5279 "pmulh<u>w\t{%2, %0|%0, %2}"
5280 [(set_attr "type" "sseimul")
5281 (set_attr "prefix_data16" "1")
5282 (set_attr "mode" "TI")])
5284 (define_expand "sse2_umulv2siv2di3"
5285 [(set (match_operand:V2DI 0 "register_operand" "")
5289 (match_operand:V4SI 1 "nonimmediate_operand" "")
5290 (parallel [(const_int 0) (const_int 2)])))
5293 (match_operand:V4SI 2 "nonimmediate_operand" "")
5294 (parallel [(const_int 0) (const_int 2)])))))]
5296 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5298 (define_insn "*avx_umulv2siv2di3"
5299 [(set (match_operand:V2DI 0 "register_operand" "=x")
5303 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5304 (parallel [(const_int 0) (const_int 2)])))
5307 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5308 (parallel [(const_int 0) (const_int 2)])))))]
5309 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5310 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5311 [(set_attr "type" "sseimul")
5312 (set_attr "prefix" "vex")
5313 (set_attr "mode" "TI")])
5315 (define_insn "*sse2_umulv2siv2di3"
5316 [(set (match_operand:V2DI 0 "register_operand" "=x")
5320 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5321 (parallel [(const_int 0) (const_int 2)])))
5324 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5325 (parallel [(const_int 0) (const_int 2)])))))]
5326 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5327 "pmuludq\t{%2, %0|%0, %2}"
5328 [(set_attr "type" "sseimul")
5329 (set_attr "prefix_data16" "1")
5330 (set_attr "mode" "TI")])
5332 (define_expand "sse4_1_mulv2siv2di3"
5333 [(set (match_operand:V2DI 0 "register_operand" "")
5337 (match_operand:V4SI 1 "nonimmediate_operand" "")
5338 (parallel [(const_int 0) (const_int 2)])))
5341 (match_operand:V4SI 2 "nonimmediate_operand" "")
5342 (parallel [(const_int 0) (const_int 2)])))))]
5344 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5346 (define_insn "*avx_mulv2siv2di3"
5347 [(set (match_operand:V2DI 0 "register_operand" "=x")
5351 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5352 (parallel [(const_int 0) (const_int 2)])))
5355 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5356 (parallel [(const_int 0) (const_int 2)])))))]
5357 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5358 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5359 [(set_attr "type" "sseimul")
5360 (set_attr "prefix_extra" "1")
5361 (set_attr "prefix" "vex")
5362 (set_attr "mode" "TI")])
5364 (define_insn "*sse4_1_mulv2siv2di3"
5365 [(set (match_operand:V2DI 0 "register_operand" "=x")
5369 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5370 (parallel [(const_int 0) (const_int 2)])))
5373 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5374 (parallel [(const_int 0) (const_int 2)])))))]
5375 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5376 "pmuldq\t{%2, %0|%0, %2}"
5377 [(set_attr "type" "sseimul")
5378 (set_attr "prefix_extra" "1")
5379 (set_attr "mode" "TI")])
5381 (define_expand "sse2_pmaddwd"
5382 [(set (match_operand:V4SI 0 "register_operand" "")
5387 (match_operand:V8HI 1 "nonimmediate_operand" "")
5388 (parallel [(const_int 0)
5394 (match_operand:V8HI 2 "nonimmediate_operand" "")
5395 (parallel [(const_int 0)
5401 (vec_select:V4HI (match_dup 1)
5402 (parallel [(const_int 1)
5407 (vec_select:V4HI (match_dup 2)
5408 (parallel [(const_int 1)
5411 (const_int 7)]))))))]
5413 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5415 (define_insn "*avx_pmaddwd"
5416 [(set (match_operand:V4SI 0 "register_operand" "=x")
5421 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5422 (parallel [(const_int 0)
5428 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5429 (parallel [(const_int 0)
5435 (vec_select:V4HI (match_dup 1)
5436 (parallel [(const_int 1)
5441 (vec_select:V4HI (match_dup 2)
5442 (parallel [(const_int 1)
5445 (const_int 7)]))))))]
5446 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5447 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5448 [(set_attr "type" "sseiadd")
5449 (set_attr "prefix" "vex")
5450 (set_attr "mode" "TI")])
5452 (define_insn "*sse2_pmaddwd"
5453 [(set (match_operand:V4SI 0 "register_operand" "=x")
5458 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5459 (parallel [(const_int 0)
5465 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5466 (parallel [(const_int 0)
5472 (vec_select:V4HI (match_dup 1)
5473 (parallel [(const_int 1)
5478 (vec_select:V4HI (match_dup 2)
5479 (parallel [(const_int 1)
5482 (const_int 7)]))))))]
5483 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5484 "pmaddwd\t{%2, %0|%0, %2}"
5485 [(set_attr "type" "sseiadd")
5486 (set_attr "atom_unit" "simul")
5487 (set_attr "prefix_data16" "1")
5488 (set_attr "mode" "TI")])
5490 (define_expand "mulv4si3"
5491 [(set (match_operand:V4SI 0 "register_operand" "")
5492 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5493 (match_operand:V4SI 2 "register_operand" "")))]
5496 if (TARGET_SSE4_1 || TARGET_AVX)
5497 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5500 (define_insn "*avx_mulv4si3"
5501 [(set (match_operand:V4SI 0 "register_operand" "=x")
5502 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5503 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5504 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5505 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5506 [(set_attr "type" "sseimul")
5507 (set_attr "prefix_extra" "1")
5508 (set_attr "prefix" "vex")
5509 (set_attr "mode" "TI")])
5511 (define_insn "*sse4_1_mulv4si3"
5512 [(set (match_operand:V4SI 0 "register_operand" "=x")
5513 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5514 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5515 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5516 "pmulld\t{%2, %0|%0, %2}"
5517 [(set_attr "type" "sseimul")
5518 (set_attr "prefix_extra" "1")
5519 (set_attr "mode" "TI")])
5521 (define_insn_and_split "*sse2_mulv4si3"
5522 [(set (match_operand:V4SI 0 "register_operand" "")
5523 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5524 (match_operand:V4SI 2 "register_operand" "")))]
5525 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5526 && can_create_pseudo_p ()"
5531 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5537 t1 = gen_reg_rtx (V4SImode);
5538 t2 = gen_reg_rtx (V4SImode);
5539 t3 = gen_reg_rtx (V4SImode);
5540 t4 = gen_reg_rtx (V4SImode);
5541 t5 = gen_reg_rtx (V4SImode);
5542 t6 = gen_reg_rtx (V4SImode);
5543 thirtytwo = GEN_INT (32);
5545 /* Multiply elements 2 and 0. */
5546 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5549 /* Shift both input vectors down one element, so that elements 3
5550 and 1 are now in the slots for elements 2 and 0. For K8, at
5551 least, this is faster than using a shuffle. */
5552 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5553 gen_lowpart (V1TImode, op1),
5555 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5556 gen_lowpart (V1TImode, op2),
5558 /* Multiply elements 3 and 1. */
5559 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5562 /* Move the results in element 2 down to element 1; we don't care
5563 what goes in elements 2 and 3. */
5564 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5565 const0_rtx, const0_rtx));
5566 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5567 const0_rtx, const0_rtx));
5569 /* Merge the parts back together. */
5570 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5574 (define_insn_and_split "mulv2di3"
5575 [(set (match_operand:V2DI 0 "register_operand" "")
5576 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5577 (match_operand:V2DI 2 "register_operand" "")))]
5579 && can_create_pseudo_p ()"
5584 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5593 /* op1: A,B,C,D, op2: E,F,G,H */
5594 op1 = gen_lowpart (V4SImode, op1);
5595 op2 = gen_lowpart (V4SImode, op2);
5597 t1 = gen_reg_rtx (V4SImode);
5598 t2 = gen_reg_rtx (V4SImode);
5599 t3 = gen_reg_rtx (V2DImode);
5600 t4 = gen_reg_rtx (V2DImode);
5603 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5609 /* t2: (B*E),(A*F),(D*G),(C*H) */
5610 emit_insn (gen_mulv4si3 (t2, t1, op2));
5612 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5613 emit_insn (gen_xop_phadddq (t3, t2));
5615 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5616 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5618 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5619 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5623 t1 = gen_reg_rtx (V2DImode);
5624 t2 = gen_reg_rtx (V2DImode);
5625 t3 = gen_reg_rtx (V2DImode);
5626 t4 = gen_reg_rtx (V2DImode);
5627 t5 = gen_reg_rtx (V2DImode);
5628 t6 = gen_reg_rtx (V2DImode);
5629 thirtytwo = GEN_INT (32);
5631 /* Multiply low parts. */
5632 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5633 gen_lowpart (V4SImode, op2)));
5635 /* Shift input vectors left 32 bits so we can multiply high parts. */
5636 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5637 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5639 /* Multiply high parts by low parts. */
5640 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5641 gen_lowpart (V4SImode, t3)));
5642 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5643 gen_lowpart (V4SImode, t2)));
5645 /* Shift them back. */
5646 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5647 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5649 /* Add the three parts together. */
5650 emit_insn (gen_addv2di3 (t6, t1, t4));
5651 emit_insn (gen_addv2di3 (op0, t6, t5));
5656 (define_expand "vec_widen_smult_hi_v8hi"
5657 [(match_operand:V4SI 0 "register_operand" "")
5658 (match_operand:V8HI 1 "register_operand" "")
5659 (match_operand:V8HI 2 "register_operand" "")]
5662 rtx op1, op2, t1, t2, dest;
5666 t1 = gen_reg_rtx (V8HImode);
5667 t2 = gen_reg_rtx (V8HImode);
5668 dest = gen_lowpart (V8HImode, operands[0]);
5670 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5671 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5672 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5676 (define_expand "vec_widen_smult_lo_v8hi"
5677 [(match_operand:V4SI 0 "register_operand" "")
5678 (match_operand:V8HI 1 "register_operand" "")
5679 (match_operand:V8HI 2 "register_operand" "")]
5682 rtx op1, op2, t1, t2, dest;
5686 t1 = gen_reg_rtx (V8HImode);
5687 t2 = gen_reg_rtx (V8HImode);
5688 dest = gen_lowpart (V8HImode, operands[0]);
5690 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5691 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5692 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5696 (define_expand "vec_widen_umult_hi_v8hi"
5697 [(match_operand:V4SI 0 "register_operand" "")
5698 (match_operand:V8HI 1 "register_operand" "")
5699 (match_operand:V8HI 2 "register_operand" "")]
5702 rtx op1, op2, t1, t2, dest;
5706 t1 = gen_reg_rtx (V8HImode);
5707 t2 = gen_reg_rtx (V8HImode);
5708 dest = gen_lowpart (V8HImode, operands[0]);
5710 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5711 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5712 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5716 (define_expand "vec_widen_umult_lo_v8hi"
5717 [(match_operand:V4SI 0 "register_operand" "")
5718 (match_operand:V8HI 1 "register_operand" "")
5719 (match_operand:V8HI 2 "register_operand" "")]
5722 rtx op1, op2, t1, t2, dest;
5726 t1 = gen_reg_rtx (V8HImode);
5727 t2 = gen_reg_rtx (V8HImode);
5728 dest = gen_lowpart (V8HImode, operands[0]);
5730 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5731 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5732 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5736 (define_expand "vec_widen_smult_hi_v4si"
5737 [(match_operand:V2DI 0 "register_operand" "")
5738 (match_operand:V4SI 1 "register_operand" "")
5739 (match_operand:V4SI 2 "register_operand" "")]
5744 t1 = gen_reg_rtx (V4SImode);
5745 t2 = gen_reg_rtx (V4SImode);
5747 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5752 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5757 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5761 (define_expand "vec_widen_smult_lo_v4si"
5762 [(match_operand:V2DI 0 "register_operand" "")
5763 (match_operand:V4SI 1 "register_operand" "")
5764 (match_operand:V4SI 2 "register_operand" "")]
5769 t1 = gen_reg_rtx (V4SImode);
5770 t2 = gen_reg_rtx (V4SImode);
5772 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5777 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5782 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5786 (define_expand "vec_widen_umult_hi_v4si"
5787 [(match_operand:V2DI 0 "register_operand" "")
5788 (match_operand:V4SI 1 "register_operand" "")
5789 (match_operand:V4SI 2 "register_operand" "")]
5792 rtx op1, op2, t1, t2;
5796 t1 = gen_reg_rtx (V4SImode);
5797 t2 = gen_reg_rtx (V4SImode);
5799 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5800 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5801 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5805 (define_expand "vec_widen_umult_lo_v4si"
5806 [(match_operand:V2DI 0 "register_operand" "")
5807 (match_operand:V4SI 1 "register_operand" "")
5808 (match_operand:V4SI 2 "register_operand" "")]
5811 rtx op1, op2, t1, t2;
5815 t1 = gen_reg_rtx (V4SImode);
5816 t2 = gen_reg_rtx (V4SImode);
5818 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5819 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5820 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5824 (define_expand "sdot_prodv8hi"
5825 [(match_operand:V4SI 0 "register_operand" "")
5826 (match_operand:V8HI 1 "register_operand" "")
5827 (match_operand:V8HI 2 "register_operand" "")
5828 (match_operand:V4SI 3 "register_operand" "")]
5831 rtx t = gen_reg_rtx (V4SImode);
5832 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5833 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5837 (define_expand "udot_prodv4si"
5838 [(match_operand:V2DI 0 "register_operand" "")
5839 (match_operand:V4SI 1 "register_operand" "")
5840 (match_operand:V4SI 2 "register_operand" "")
5841 (match_operand:V2DI 3 "register_operand" "")]
5846 t1 = gen_reg_rtx (V2DImode);
5847 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5848 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5850 t2 = gen_reg_rtx (V4SImode);
5851 t3 = gen_reg_rtx (V4SImode);
5852 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5853 gen_lowpart (V1TImode, operands[1]),
5855 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5856 gen_lowpart (V1TImode, operands[2]),
5859 t4 = gen_reg_rtx (V2DImode);
5860 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5862 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5866 (define_insn "*avx_ashr<mode>3"
5867 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5869 (match_operand:SSEMODE24 1 "register_operand" "x")
5870 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5872 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5873 [(set_attr "type" "sseishft")
5874 (set_attr "prefix" "vex")
5875 (set (attr "length_immediate")
5876 (if_then_else (match_operand 2 "const_int_operand" "")
5878 (const_string "0")))
5879 (set_attr "mode" "TI")])
5881 (define_insn "ashr<mode>3"
5882 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5884 (match_operand:SSEMODE24 1 "register_operand" "0")
5885 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5887 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5888 [(set_attr "type" "sseishft")
5889 (set_attr "prefix_data16" "1")
5890 (set (attr "length_immediate")
5891 (if_then_else (match_operand 2 "const_int_operand" "")
5893 (const_string "0")))
5894 (set_attr "mode" "TI")])
5896 (define_insn "*avx_lshrv1ti3"
5897 [(set (match_operand:V1TI 0 "register_operand" "=x")
5899 (match_operand:V1TI 1 "register_operand" "x")
5900 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5903 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5904 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5906 [(set_attr "type" "sseishft")
5907 (set_attr "prefix" "vex")
5908 (set_attr "length_immediate" "1")
5909 (set_attr "mode" "TI")])
5911 (define_insn "*avx_lshr<mode>3"
5912 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5913 (lshiftrt:SSEMODE248
5914 (match_operand:SSEMODE248 1 "register_operand" "x")
5915 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5917 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5918 [(set_attr "type" "sseishft")
5919 (set_attr "prefix" "vex")
5920 (set (attr "length_immediate")
5921 (if_then_else (match_operand 2 "const_int_operand" "")
5923 (const_string "0")))
5924 (set_attr "mode" "TI")])
5926 (define_insn "sse2_lshrv1ti3"
5927 [(set (match_operand:V1TI 0 "register_operand" "=x")
5929 (match_operand:V1TI 1 "register_operand" "0")
5930 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5933 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5934 return "psrldq\t{%2, %0|%0, %2}";
5936 [(set_attr "type" "sseishft")
5937 (set_attr "prefix_data16" "1")
5938 (set_attr "length_immediate" "1")
5939 (set_attr "atom_unit" "sishuf")
5940 (set_attr "mode" "TI")])
5942 (define_insn "lshr<mode>3"
5943 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5944 (lshiftrt:SSEMODE248
5945 (match_operand:SSEMODE248 1 "register_operand" "0")
5946 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5948 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5949 [(set_attr "type" "sseishft")
5950 (set_attr "prefix_data16" "1")
5951 (set (attr "length_immediate")
5952 (if_then_else (match_operand 2 "const_int_operand" "")
5954 (const_string "0")))
5955 (set_attr "mode" "TI")])
5957 (define_insn "*avx_ashlv1ti3"
5958 [(set (match_operand:V1TI 0 "register_operand" "=x")
5959 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5960 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5963 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5964 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5966 [(set_attr "type" "sseishft")
5967 (set_attr "prefix" "vex")
5968 (set_attr "length_immediate" "1")
5969 (set_attr "mode" "TI")])
5971 (define_insn "*avx_ashl<mode>3"
5972 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5974 (match_operand:SSEMODE248 1 "register_operand" "x")
5975 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5977 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5978 [(set_attr "type" "sseishft")
5979 (set_attr "prefix" "vex")
5980 (set (attr "length_immediate")
5981 (if_then_else (match_operand 2 "const_int_operand" "")
5983 (const_string "0")))
5984 (set_attr "mode" "TI")])
5986 (define_insn "sse2_ashlv1ti3"
5987 [(set (match_operand:V1TI 0 "register_operand" "=x")
5988 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5989 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5992 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5993 return "pslldq\t{%2, %0|%0, %2}";
5995 [(set_attr "type" "sseishft")
5996 (set_attr "prefix_data16" "1")
5997 (set_attr "length_immediate" "1")
5998 (set_attr "mode" "TI")])
6000 (define_insn "ashl<mode>3"
6001 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6003 (match_operand:SSEMODE248 1 "register_operand" "0")
6004 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6006 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6007 [(set_attr "type" "sseishft")
6008 (set_attr "prefix_data16" "1")
6009 (set (attr "length_immediate")
6010 (if_then_else (match_operand 2 "const_int_operand" "")
6012 (const_string "0")))
6013 (set_attr "mode" "TI")])
6015 (define_expand "vec_shl_<mode>"
6016 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6018 (match_operand:SSEMODEI 1 "register_operand" "")
6019 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6022 operands[0] = gen_lowpart (V1TImode, operands[0]);
6023 operands[1] = gen_lowpart (V1TImode, operands[1]);
6026 (define_expand "vec_shr_<mode>"
6027 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6029 (match_operand:SSEMODEI 1 "register_operand" "")
6030 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6033 operands[0] = gen_lowpart (V1TImode, operands[0]);
6034 operands[1] = gen_lowpart (V1TImode, operands[1]);
6037 (define_insn "*avx_<code><mode>3"
6038 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6040 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6041 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6042 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6043 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6044 [(set_attr "type" "sseiadd")
6045 (set (attr "prefix_extra")
6046 (if_then_else (match_operand:V16QI 0 "" "")
6048 (const_string "1")))
6049 (set_attr "prefix" "vex")
6050 (set_attr "mode" "TI")])
6052 (define_expand "<code>v16qi3"
6053 [(set (match_operand:V16QI 0 "register_operand" "")
6055 (match_operand:V16QI 1 "nonimmediate_operand" "")
6056 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6058 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6060 (define_insn "*<code>v16qi3"
6061 [(set (match_operand:V16QI 0 "register_operand" "=x")
6063 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6064 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6065 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6066 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6067 [(set_attr "type" "sseiadd")
6068 (set_attr "prefix_data16" "1")
6069 (set_attr "mode" "TI")])
6071 (define_insn "*avx_<code><mode>3"
6072 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6074 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6075 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6076 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6077 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6078 [(set_attr "type" "sseiadd")
6079 (set (attr "prefix_extra")
6080 (if_then_else (match_operand:V8HI 0 "" "")
6082 (const_string "1")))
6083 (set_attr "prefix" "vex")
6084 (set_attr "mode" "TI")])
6086 (define_expand "<code>v8hi3"
6087 [(set (match_operand:V8HI 0 "register_operand" "")
6089 (match_operand:V8HI 1 "nonimmediate_operand" "")
6090 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6092 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6094 (define_insn "*<code>v8hi3"
6095 [(set (match_operand:V8HI 0 "register_operand" "=x")
6097 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6098 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6099 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6100 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6101 [(set_attr "type" "sseiadd")
6102 (set_attr "prefix_data16" "1")
6103 (set_attr "mode" "TI")])
6105 (define_expand "umaxv8hi3"
6106 [(set (match_operand:V8HI 0 "register_operand" "")
6107 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6108 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6112 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6115 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6116 if (rtx_equal_p (op3, op2))
6117 op3 = gen_reg_rtx (V8HImode);
6118 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6119 emit_insn (gen_addv8hi3 (op0, op3, op2));
6124 (define_expand "smax<mode>3"
6125 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6126 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6127 (match_operand:SSEMODE14 2 "register_operand" "")))]
6131 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6137 xops[0] = operands[0];
6138 xops[1] = operands[1];
6139 xops[2] = operands[2];
6140 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6141 xops[4] = operands[1];
6142 xops[5] = operands[2];
6143 ok = ix86_expand_int_vcond (xops);
6149 (define_insn "*sse4_1_<code><mode>3"
6150 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6152 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6153 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6154 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6155 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6156 [(set_attr "type" "sseiadd")
6157 (set_attr "prefix_extra" "1")
6158 (set_attr "mode" "TI")])
6160 (define_expand "smaxv2di3"
6161 [(set (match_operand:V2DI 0 "register_operand" "")
6162 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6163 (match_operand:V2DI 2 "register_operand" "")))]
6169 xops[0] = operands[0];
6170 xops[1] = operands[1];
6171 xops[2] = operands[2];
6172 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6173 xops[4] = operands[1];
6174 xops[5] = operands[2];
6175 ok = ix86_expand_int_vcond (xops);
6180 (define_expand "umaxv4si3"
6181 [(set (match_operand:V4SI 0 "register_operand" "")
6182 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6183 (match_operand:V4SI 2 "register_operand" "")))]
6187 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6193 xops[0] = operands[0];
6194 xops[1] = operands[1];
6195 xops[2] = operands[2];
6196 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6197 xops[4] = operands[1];
6198 xops[5] = operands[2];
6199 ok = ix86_expand_int_vcond (xops);
6205 (define_insn "*sse4_1_<code><mode>3"
6206 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6208 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6209 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6210 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6211 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6212 [(set_attr "type" "sseiadd")
6213 (set_attr "prefix_extra" "1")
6214 (set_attr "mode" "TI")])
6216 (define_expand "umaxv2di3"
6217 [(set (match_operand:V2DI 0 "register_operand" "")
6218 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6219 (match_operand:V2DI 2 "register_operand" "")))]
6225 xops[0] = operands[0];
6226 xops[1] = operands[1];
6227 xops[2] = operands[2];
6228 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6229 xops[4] = operands[1];
6230 xops[5] = operands[2];
6231 ok = ix86_expand_int_vcond (xops);
6236 (define_expand "smin<mode>3"
6237 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6238 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6239 (match_operand:SSEMODE14 2 "register_operand" "")))]
6243 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6249 xops[0] = operands[0];
6250 xops[1] = operands[2];
6251 xops[2] = operands[1];
6252 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6253 xops[4] = operands[1];
6254 xops[5] = operands[2];
6255 ok = ix86_expand_int_vcond (xops);
6261 (define_expand "sminv2di3"
6262 [(set (match_operand:V2DI 0 "register_operand" "")
6263 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6264 (match_operand:V2DI 2 "register_operand" "")))]
6270 xops[0] = operands[0];
6271 xops[1] = operands[2];
6272 xops[2] = operands[1];
6273 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6274 xops[4] = operands[1];
6275 xops[5] = operands[2];
6276 ok = ix86_expand_int_vcond (xops);
6281 (define_expand "umin<mode>3"
6282 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6283 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6284 (match_operand:SSEMODE24 2 "register_operand" "")))]
6288 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6294 xops[0] = operands[0];
6295 xops[1] = operands[2];
6296 xops[2] = operands[1];
6297 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6298 xops[4] = operands[1];
6299 xops[5] = operands[2];
6300 ok = ix86_expand_int_vcond (xops);
6306 (define_expand "uminv2di3"
6307 [(set (match_operand:V2DI 0 "register_operand" "")
6308 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6309 (match_operand:V2DI 2 "register_operand" "")))]
6315 xops[0] = operands[0];
6316 xops[1] = operands[2];
6317 xops[2] = operands[1];
6318 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6319 xops[4] = operands[1];
6320 xops[5] = operands[2];
6321 ok = ix86_expand_int_vcond (xops);
6326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6328 ;; Parallel integral comparisons
6330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6332 (define_expand "sse2_eq<mode>3"
6333 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6335 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6336 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6337 "TARGET_SSE2 && !TARGET_XOP "
6338 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6340 (define_insn "*avx_eq<mode>3"
6341 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6343 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6344 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6345 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6346 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6347 [(set_attr "type" "ssecmp")
6348 (set (attr "prefix_extra")
6349 (if_then_else (match_operand:V2DI 0 "" "")
6351 (const_string "*")))
6352 (set_attr "prefix" "vex")
6353 (set_attr "mode" "TI")])
6355 (define_insn "*sse2_eq<mode>3"
6356 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6358 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6359 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6360 "TARGET_SSE2 && !TARGET_XOP
6361 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6362 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6363 [(set_attr "type" "ssecmp")
6364 (set_attr "prefix_data16" "1")
6365 (set_attr "mode" "TI")])
6367 (define_expand "sse4_1_eqv2di3"
6368 [(set (match_operand:V2DI 0 "register_operand" "")
6370 (match_operand:V2DI 1 "nonimmediate_operand" "")
6371 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6373 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6375 (define_insn "*sse4_1_eqv2di3"
6376 [(set (match_operand:V2DI 0 "register_operand" "=x")
6378 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6379 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6380 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6381 "pcmpeqq\t{%2, %0|%0, %2}"
6382 [(set_attr "type" "ssecmp")
6383 (set_attr "prefix_extra" "1")
6384 (set_attr "mode" "TI")])
6386 (define_insn "*avx_gt<mode>3"
6387 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6389 (match_operand:SSEMODE1248 1 "register_operand" "x")
6390 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6392 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6393 [(set_attr "type" "ssecmp")
6394 (set (attr "prefix_extra")
6395 (if_then_else (match_operand:V2DI 0 "" "")
6397 (const_string "*")))
6398 (set_attr "prefix" "vex")
6399 (set_attr "mode" "TI")])
6401 (define_insn "sse2_gt<mode>3"
6402 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6404 (match_operand:SSEMODE124 1 "register_operand" "0")
6405 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6406 "TARGET_SSE2 && !TARGET_XOP"
6407 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6408 [(set_attr "type" "ssecmp")
6409 (set_attr "prefix_data16" "1")
6410 (set_attr "mode" "TI")])
6412 (define_insn "sse4_2_gtv2di3"
6413 [(set (match_operand:V2DI 0 "register_operand" "=x")
6415 (match_operand:V2DI 1 "register_operand" "0")
6416 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6418 "pcmpgtq\t{%2, %0|%0, %2}"
6419 [(set_attr "type" "ssecmp")
6420 (set_attr "prefix_extra" "1")
6421 (set_attr "mode" "TI")])
6423 (define_expand "vcond<mode>"
6424 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6425 (if_then_else:SSEMODE124C8
6426 (match_operator 3 ""
6427 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6428 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6429 (match_operand:SSEMODE124C8 1 "general_operand" "")
6430 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6433 bool ok = ix86_expand_int_vcond (operands);
6438 (define_expand "vcondu<mode>"
6439 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6440 (if_then_else:SSEMODE124C8
6441 (match_operator 3 ""
6442 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6443 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6444 (match_operand:SSEMODE124C8 1 "general_operand" "")
6445 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6448 bool ok = ix86_expand_int_vcond (operands);
6453 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6455 ;; Parallel bitwise logical operations
6457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6459 (define_expand "one_cmpl<mode>2"
6460 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6461 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6465 int i, n = GET_MODE_NUNITS (<MODE>mode);
6466 rtvec v = rtvec_alloc (n);
6468 for (i = 0; i < n; ++i)
6469 RTVEC_ELT (v, i) = constm1_rtx;
6471 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6474 (define_insn "*avx_andnot<mode>3"
6475 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6477 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6478 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6480 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6481 [(set_attr "type" "sselog")
6482 (set_attr "prefix" "vex")
6483 (set_attr "mode" "<avxvecpsmode>")])
6485 (define_insn "*sse_andnot<mode>3"
6486 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6488 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6489 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6490 "(TARGET_SSE && !TARGET_SSE2)"
6491 "andnps\t{%2, %0|%0, %2}"
6492 [(set_attr "type" "sselog")
6493 (set_attr "mode" "V4SF")])
6495 (define_insn "*avx_andnot<mode>3"
6496 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6498 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6499 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6501 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6502 [(set_attr "type" "sselog")
6503 (set_attr "prefix" "vex")
6504 (set_attr "mode" "TI")])
6506 (define_insn "sse2_andnot<mode>3"
6507 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6509 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6510 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6512 "pandn\t{%2, %0|%0, %2}"
6513 [(set_attr "type" "sselog")
6514 (set_attr "prefix_data16" "1")
6515 (set_attr "mode" "TI")])
6517 (define_insn "*andnottf3"
6518 [(set (match_operand:TF 0 "register_operand" "=x")
6520 (not:TF (match_operand:TF 1 "register_operand" "0"))
6521 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6523 "pandn\t{%2, %0|%0, %2}"
6524 [(set_attr "type" "sselog")
6525 (set_attr "prefix_data16" "1")
6526 (set_attr "mode" "TI")])
6528 (define_expand "<code><mode>3"
6529 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6531 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6532 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6534 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6536 (define_insn "*avx_<code><mode>3"
6537 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6538 (any_logic:AVX256MODEI
6539 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6540 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6542 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6543 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6544 [(set_attr "type" "sselog")
6545 (set_attr "prefix" "vex")
6546 (set_attr "mode" "<avxvecpsmode>")])
6548 (define_insn "*sse_<code><mode>3"
6549 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6551 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6552 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6553 "(TARGET_SSE && !TARGET_SSE2)
6554 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6555 "<logic>ps\t{%2, %0|%0, %2}"
6556 [(set_attr "type" "sselog")
6557 (set_attr "mode" "V4SF")])
6559 (define_insn "*avx_<code><mode>3"
6560 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6562 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6563 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6565 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6566 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6567 [(set_attr "type" "sselog")
6568 (set_attr "prefix" "vex")
6569 (set_attr "mode" "TI")])
6571 (define_insn "*sse2_<code><mode>3"
6572 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6574 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6575 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6576 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6577 "p<logic>\t{%2, %0|%0, %2}"
6578 [(set_attr "type" "sselog")
6579 (set_attr "prefix_data16" "1")
6580 (set_attr "mode" "TI")])
6582 (define_expand "<code>tf3"
6583 [(set (match_operand:TF 0 "register_operand" "")
6585 (match_operand:TF 1 "nonimmediate_operand" "")
6586 (match_operand:TF 2 "nonimmediate_operand" "")))]
6588 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6590 (define_insn "*<code>tf3"
6591 [(set (match_operand:TF 0 "register_operand" "=x")
6593 (match_operand:TF 1 "nonimmediate_operand" "%0")
6594 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6595 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6596 "p<logic>\t{%2, %0|%0, %2}"
6597 [(set_attr "type" "sselog")
6598 (set_attr "prefix_data16" "1")
6599 (set_attr "mode" "TI")])
6601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6603 ;; Parallel integral element swizzling
6605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6607 (define_expand "vec_pack_trunc_v8hi"
6608 [(match_operand:V16QI 0 "register_operand" "")
6609 (match_operand:V8HI 1 "register_operand" "")
6610 (match_operand:V8HI 2 "register_operand" "")]
6613 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6614 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6615 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6619 (define_expand "vec_pack_trunc_v4si"
6620 [(match_operand:V8HI 0 "register_operand" "")
6621 (match_operand:V4SI 1 "register_operand" "")
6622 (match_operand:V4SI 2 "register_operand" "")]
6625 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6626 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6627 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6631 (define_expand "vec_pack_trunc_v2di"
6632 [(match_operand:V4SI 0 "register_operand" "")
6633 (match_operand:V2DI 1 "register_operand" "")
6634 (match_operand:V2DI 2 "register_operand" "")]
6637 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6638 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6639 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6643 (define_insn "*avx_packsswb"
6644 [(set (match_operand:V16QI 0 "register_operand" "=x")
6647 (match_operand:V8HI 1 "register_operand" "x"))
6649 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6651 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6652 [(set_attr "type" "sselog")
6653 (set_attr "prefix" "vex")
6654 (set_attr "mode" "TI")])
6656 (define_insn "sse2_packsswb"
6657 [(set (match_operand:V16QI 0 "register_operand" "=x")
6660 (match_operand:V8HI 1 "register_operand" "0"))
6662 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6664 "packsswb\t{%2, %0|%0, %2}"
6665 [(set_attr "type" "sselog")
6666 (set_attr "prefix_data16" "1")
6667 (set_attr "mode" "TI")])
6669 (define_insn "*avx_packssdw"
6670 [(set (match_operand:V8HI 0 "register_operand" "=x")
6673 (match_operand:V4SI 1 "register_operand" "x"))
6675 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6677 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6678 [(set_attr "type" "sselog")
6679 (set_attr "prefix" "vex")
6680 (set_attr "mode" "TI")])
6682 (define_insn "sse2_packssdw"
6683 [(set (match_operand:V8HI 0 "register_operand" "=x")
6686 (match_operand:V4SI 1 "register_operand" "0"))
6688 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6690 "packssdw\t{%2, %0|%0, %2}"
6691 [(set_attr "type" "sselog")
6692 (set_attr "prefix_data16" "1")
6693 (set_attr "mode" "TI")])
6695 (define_insn "*avx_packuswb"
6696 [(set (match_operand:V16QI 0 "register_operand" "=x")
6699 (match_operand:V8HI 1 "register_operand" "x"))
6701 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6703 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6704 [(set_attr "type" "sselog")
6705 (set_attr "prefix" "vex")
6706 (set_attr "mode" "TI")])
6708 (define_insn "sse2_packuswb"
6709 [(set (match_operand:V16QI 0 "register_operand" "=x")
6712 (match_operand:V8HI 1 "register_operand" "0"))
6714 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6716 "packuswb\t{%2, %0|%0, %2}"
6717 [(set_attr "type" "sselog")
6718 (set_attr "prefix_data16" "1")
6719 (set_attr "mode" "TI")])
6721 (define_insn "*avx_interleave_highv16qi"
6722 [(set (match_operand:V16QI 0 "register_operand" "=x")
6725 (match_operand:V16QI 1 "register_operand" "x")
6726 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6727 (parallel [(const_int 8) (const_int 24)
6728 (const_int 9) (const_int 25)
6729 (const_int 10) (const_int 26)
6730 (const_int 11) (const_int 27)
6731 (const_int 12) (const_int 28)
6732 (const_int 13) (const_int 29)
6733 (const_int 14) (const_int 30)
6734 (const_int 15) (const_int 31)])))]
6736 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6737 [(set_attr "type" "sselog")
6738 (set_attr "prefix" "vex")
6739 (set_attr "mode" "TI")])
6741 (define_insn "vec_interleave_highv16qi"
6742 [(set (match_operand:V16QI 0 "register_operand" "=x")
6745 (match_operand:V16QI 1 "register_operand" "0")
6746 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6747 (parallel [(const_int 8) (const_int 24)
6748 (const_int 9) (const_int 25)
6749 (const_int 10) (const_int 26)
6750 (const_int 11) (const_int 27)
6751 (const_int 12) (const_int 28)
6752 (const_int 13) (const_int 29)
6753 (const_int 14) (const_int 30)
6754 (const_int 15) (const_int 31)])))]
6756 "punpckhbw\t{%2, %0|%0, %2}"
6757 [(set_attr "type" "sselog")
6758 (set_attr "prefix_data16" "1")
6759 (set_attr "mode" "TI")])
6761 (define_insn "*avx_interleave_lowv16qi"
6762 [(set (match_operand:V16QI 0 "register_operand" "=x")
6765 (match_operand:V16QI 1 "register_operand" "x")
6766 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6767 (parallel [(const_int 0) (const_int 16)
6768 (const_int 1) (const_int 17)
6769 (const_int 2) (const_int 18)
6770 (const_int 3) (const_int 19)
6771 (const_int 4) (const_int 20)
6772 (const_int 5) (const_int 21)
6773 (const_int 6) (const_int 22)
6774 (const_int 7) (const_int 23)])))]
6776 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6777 [(set_attr "type" "sselog")
6778 (set_attr "prefix" "vex")
6779 (set_attr "mode" "TI")])
6781 (define_insn "vec_interleave_lowv16qi"
6782 [(set (match_operand:V16QI 0 "register_operand" "=x")
6785 (match_operand:V16QI 1 "register_operand" "0")
6786 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6787 (parallel [(const_int 0) (const_int 16)
6788 (const_int 1) (const_int 17)
6789 (const_int 2) (const_int 18)
6790 (const_int 3) (const_int 19)
6791 (const_int 4) (const_int 20)
6792 (const_int 5) (const_int 21)
6793 (const_int 6) (const_int 22)
6794 (const_int 7) (const_int 23)])))]
6796 "punpcklbw\t{%2, %0|%0, %2}"
6797 [(set_attr "type" "sselog")
6798 (set_attr "prefix_data16" "1")
6799 (set_attr "mode" "TI")])
6801 (define_insn "*avx_interleave_highv8hi"
6802 [(set (match_operand:V8HI 0 "register_operand" "=x")
6805 (match_operand:V8HI 1 "register_operand" "x")
6806 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6807 (parallel [(const_int 4) (const_int 12)
6808 (const_int 5) (const_int 13)
6809 (const_int 6) (const_int 14)
6810 (const_int 7) (const_int 15)])))]
6812 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6813 [(set_attr "type" "sselog")
6814 (set_attr "prefix" "vex")
6815 (set_attr "mode" "TI")])
6817 (define_insn "vec_interleave_highv8hi"
6818 [(set (match_operand:V8HI 0 "register_operand" "=x")
6821 (match_operand:V8HI 1 "register_operand" "0")
6822 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6823 (parallel [(const_int 4) (const_int 12)
6824 (const_int 5) (const_int 13)
6825 (const_int 6) (const_int 14)
6826 (const_int 7) (const_int 15)])))]
6828 "punpckhwd\t{%2, %0|%0, %2}"
6829 [(set_attr "type" "sselog")
6830 (set_attr "prefix_data16" "1")
6831 (set_attr "mode" "TI")])
6833 (define_insn "*avx_interleave_lowv8hi"
6834 [(set (match_operand:V8HI 0 "register_operand" "=x")
6837 (match_operand:V8HI 1 "register_operand" "x")
6838 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6839 (parallel [(const_int 0) (const_int 8)
6840 (const_int 1) (const_int 9)
6841 (const_int 2) (const_int 10)
6842 (const_int 3) (const_int 11)])))]
6844 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6845 [(set_attr "type" "sselog")
6846 (set_attr "prefix" "vex")
6847 (set_attr "mode" "TI")])
6849 (define_insn "vec_interleave_lowv8hi"
6850 [(set (match_operand:V8HI 0 "register_operand" "=x")
6853 (match_operand:V8HI 1 "register_operand" "0")
6854 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6855 (parallel [(const_int 0) (const_int 8)
6856 (const_int 1) (const_int 9)
6857 (const_int 2) (const_int 10)
6858 (const_int 3) (const_int 11)])))]
6860 "punpcklwd\t{%2, %0|%0, %2}"
6861 [(set_attr "type" "sselog")
6862 (set_attr "prefix_data16" "1")
6863 (set_attr "mode" "TI")])
6865 (define_insn "*avx_interleave_highv4si"
6866 [(set (match_operand:V4SI 0 "register_operand" "=x")
6869 (match_operand:V4SI 1 "register_operand" "x")
6870 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6871 (parallel [(const_int 2) (const_int 6)
6872 (const_int 3) (const_int 7)])))]
6874 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6875 [(set_attr "type" "sselog")
6876 (set_attr "prefix" "vex")
6877 (set_attr "mode" "TI")])
6879 (define_insn "vec_interleave_highv4si"
6880 [(set (match_operand:V4SI 0 "register_operand" "=x")
6883 (match_operand:V4SI 1 "register_operand" "0")
6884 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6885 (parallel [(const_int 2) (const_int 6)
6886 (const_int 3) (const_int 7)])))]
6888 "punpckhdq\t{%2, %0|%0, %2}"
6889 [(set_attr "type" "sselog")
6890 (set_attr "prefix_data16" "1")
6891 (set_attr "mode" "TI")])
6893 (define_insn "*avx_interleave_lowv4si"
6894 [(set (match_operand:V4SI 0 "register_operand" "=x")
6897 (match_operand:V4SI 1 "register_operand" "x")
6898 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6899 (parallel [(const_int 0) (const_int 4)
6900 (const_int 1) (const_int 5)])))]
6902 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6903 [(set_attr "type" "sselog")
6904 (set_attr "prefix" "vex")
6905 (set_attr "mode" "TI")])
6907 (define_insn "vec_interleave_lowv4si"
6908 [(set (match_operand:V4SI 0 "register_operand" "=x")
6911 (match_operand:V4SI 1 "register_operand" "0")
6912 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6913 (parallel [(const_int 0) (const_int 4)
6914 (const_int 1) (const_int 5)])))]
6916 "punpckldq\t{%2, %0|%0, %2}"
6917 [(set_attr "type" "sselog")
6918 (set_attr "prefix_data16" "1")
6919 (set_attr "mode" "TI")])
6921 (define_insn "*avx_pinsr<ssevecsize>"
6922 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6923 (vec_merge:SSEMODE124
6924 (vec_duplicate:SSEMODE124
6925 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6926 (match_operand:SSEMODE124 1 "register_operand" "x")
6927 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6930 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6931 if (MEM_P (operands[2]))
6932 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6934 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6936 [(set_attr "type" "sselog")
6937 (set (attr "prefix_extra")
6938 (if_then_else (match_operand:V8HI 0 "" "")
6940 (const_string "1")))
6941 (set_attr "length_immediate" "1")
6942 (set_attr "prefix" "vex")
6943 (set_attr "mode" "TI")])
6945 (define_insn "*sse4_1_pinsrb"
6946 [(set (match_operand:V16QI 0 "register_operand" "=x")
6948 (vec_duplicate:V16QI
6949 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6950 (match_operand:V16QI 1 "register_operand" "0")
6951 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6954 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6955 if (MEM_P (operands[2]))
6956 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6958 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6960 [(set_attr "type" "sselog")
6961 (set_attr "prefix_extra" "1")
6962 (set_attr "length_immediate" "1")
6963 (set_attr "mode" "TI")])
6965 (define_insn "*sse2_pinsrw"
6966 [(set (match_operand:V8HI 0 "register_operand" "=x")
6969 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6970 (match_operand:V8HI 1 "register_operand" "0")
6971 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6974 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6975 if (MEM_P (operands[2]))
6976 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6978 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6980 [(set_attr "type" "sselog")
6981 (set_attr "prefix_data16" "1")
6982 (set_attr "length_immediate" "1")
6983 (set_attr "mode" "TI")])
6985 ;; It must come before sse2_loadld since it is preferred.
6986 (define_insn "*sse4_1_pinsrd"
6987 [(set (match_operand:V4SI 0 "register_operand" "=x")
6990 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6991 (match_operand:V4SI 1 "register_operand" "0")
6992 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6995 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6996 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6998 [(set_attr "type" "sselog")
6999 (set_attr "prefix_extra" "1")
7000 (set_attr "length_immediate" "1")
7001 (set_attr "mode" "TI")])
7003 (define_insn "*avx_pinsrq"
7004 [(set (match_operand:V2DI 0 "register_operand" "=x")
7007 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7008 (match_operand:V2DI 1 "register_operand" "x")
7009 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7010 "TARGET_AVX && TARGET_64BIT"
7012 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7013 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7015 [(set_attr "type" "sselog")
7016 (set_attr "prefix_extra" "1")
7017 (set_attr "length_immediate" "1")
7018 (set_attr "prefix" "vex")
7019 (set_attr "mode" "TI")])
7021 (define_insn "*sse4_1_pinsrq"
7022 [(set (match_operand:V2DI 0 "register_operand" "=x")
7025 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7026 (match_operand:V2DI 1 "register_operand" "0")
7027 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7028 "TARGET_SSE4_1 && TARGET_64BIT"
7030 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7031 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7033 [(set_attr "type" "sselog")
7034 (set_attr "prefix_rex" "1")
7035 (set_attr "prefix_extra" "1")
7036 (set_attr "length_immediate" "1")
7037 (set_attr "mode" "TI")])
7039 (define_insn "*sse4_1_pextrb_<mode>"
7040 [(set (match_operand:SWI48 0 "register_operand" "=r")
7043 (match_operand:V16QI 1 "register_operand" "x")
7044 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7046 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7047 [(set_attr "type" "sselog")
7048 (set_attr "prefix_extra" "1")
7049 (set_attr "length_immediate" "1")
7050 (set_attr "prefix" "maybe_vex")
7051 (set_attr "mode" "TI")])
7053 (define_insn "*sse4_1_pextrb_memory"
7054 [(set (match_operand:QI 0 "memory_operand" "=m")
7056 (match_operand:V16QI 1 "register_operand" "x")
7057 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7059 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7060 [(set_attr "type" "sselog")
7061 (set_attr "prefix_extra" "1")
7062 (set_attr "length_immediate" "1")
7063 (set_attr "prefix" "maybe_vex")
7064 (set_attr "mode" "TI")])
7066 (define_insn "*sse2_pextrw_<mode>"
7067 [(set (match_operand:SWI48 0 "register_operand" "=r")
7070 (match_operand:V8HI 1 "register_operand" "x")
7071 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7073 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix_data16" "1")
7076 (set_attr "length_immediate" "1")
7077 (set_attr "prefix" "maybe_vex")
7078 (set_attr "mode" "TI")])
7080 (define_insn "*sse4_1_pextrw_memory"
7081 [(set (match_operand:HI 0 "memory_operand" "=m")
7083 (match_operand:V8HI 1 "register_operand" "x")
7084 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7086 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7087 [(set_attr "type" "sselog")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "maybe_vex")
7091 (set_attr "mode" "TI")])
7093 (define_insn "*sse4_1_pextrd"
7094 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7096 (match_operand:V4SI 1 "register_operand" "x")
7097 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7099 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7100 [(set_attr "type" "sselog")
7101 (set_attr "prefix_extra" "1")
7102 (set_attr "length_immediate" "1")
7103 (set_attr "prefix" "maybe_vex")
7104 (set_attr "mode" "TI")])
7106 (define_insn "*sse4_1_pextrd_zext"
7107 [(set (match_operand:DI 0 "register_operand" "=r")
7110 (match_operand:V4SI 1 "register_operand" "x")
7111 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7112 "TARGET_64BIT && TARGET_SSE4_1"
7113 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7114 [(set_attr "type" "sselog")
7115 (set_attr "prefix_extra" "1")
7116 (set_attr "length_immediate" "1")
7117 (set_attr "prefix" "maybe_vex")
7118 (set_attr "mode" "TI")])
7120 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7121 (define_insn "*sse4_1_pextrq"
7122 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7124 (match_operand:V2DI 1 "register_operand" "x")
7125 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7126 "TARGET_SSE4_1 && TARGET_64BIT"
7127 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7128 [(set_attr "type" "sselog")
7129 (set_attr "prefix_rex" "1")
7130 (set_attr "prefix_extra" "1")
7131 (set_attr "length_immediate" "1")
7132 (set_attr "prefix" "maybe_vex")
7133 (set_attr "mode" "TI")])
7135 (define_expand "sse2_pshufd"
7136 [(match_operand:V4SI 0 "register_operand" "")
7137 (match_operand:V4SI 1 "nonimmediate_operand" "")
7138 (match_operand:SI 2 "const_int_operand" "")]
7141 int mask = INTVAL (operands[2]);
7142 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7143 GEN_INT ((mask >> 0) & 3),
7144 GEN_INT ((mask >> 2) & 3),
7145 GEN_INT ((mask >> 4) & 3),
7146 GEN_INT ((mask >> 6) & 3)));
7150 (define_insn "sse2_pshufd_1"
7151 [(set (match_operand:V4SI 0 "register_operand" "=x")
7153 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7154 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7155 (match_operand 3 "const_0_to_3_operand" "")
7156 (match_operand 4 "const_0_to_3_operand" "")
7157 (match_operand 5 "const_0_to_3_operand" "")])))]
7161 mask |= INTVAL (operands[2]) << 0;
7162 mask |= INTVAL (operands[3]) << 2;
7163 mask |= INTVAL (operands[4]) << 4;
7164 mask |= INTVAL (operands[5]) << 6;
7165 operands[2] = GEN_INT (mask);
7167 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7169 [(set_attr "type" "sselog1")
7170 (set_attr "prefix_data16" "1")
7171 (set_attr "prefix" "maybe_vex")
7172 (set_attr "length_immediate" "1")
7173 (set_attr "mode" "TI")])
7175 (define_expand "sse2_pshuflw"
7176 [(match_operand:V8HI 0 "register_operand" "")
7177 (match_operand:V8HI 1 "nonimmediate_operand" "")
7178 (match_operand:SI 2 "const_int_operand" "")]
7181 int mask = INTVAL (operands[2]);
7182 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7183 GEN_INT ((mask >> 0) & 3),
7184 GEN_INT ((mask >> 2) & 3),
7185 GEN_INT ((mask >> 4) & 3),
7186 GEN_INT ((mask >> 6) & 3)));
7190 (define_insn "sse2_pshuflw_1"
7191 [(set (match_operand:V8HI 0 "register_operand" "=x")
7193 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7194 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7195 (match_operand 3 "const_0_to_3_operand" "")
7196 (match_operand 4 "const_0_to_3_operand" "")
7197 (match_operand 5 "const_0_to_3_operand" "")
7205 mask |= INTVAL (operands[2]) << 0;
7206 mask |= INTVAL (operands[3]) << 2;
7207 mask |= INTVAL (operands[4]) << 4;
7208 mask |= INTVAL (operands[5]) << 6;
7209 operands[2] = GEN_INT (mask);
7211 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7213 [(set_attr "type" "sselog")
7214 (set_attr "prefix_data16" "0")
7215 (set_attr "prefix_rep" "1")
7216 (set_attr "prefix" "maybe_vex")
7217 (set_attr "length_immediate" "1")
7218 (set_attr "mode" "TI")])
7220 (define_expand "sse2_pshufhw"
7221 [(match_operand:V8HI 0 "register_operand" "")
7222 (match_operand:V8HI 1 "nonimmediate_operand" "")
7223 (match_operand:SI 2 "const_int_operand" "")]
7226 int mask = INTVAL (operands[2]);
7227 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7228 GEN_INT (((mask >> 0) & 3) + 4),
7229 GEN_INT (((mask >> 2) & 3) + 4),
7230 GEN_INT (((mask >> 4) & 3) + 4),
7231 GEN_INT (((mask >> 6) & 3) + 4)));
7235 (define_insn "sse2_pshufhw_1"
7236 [(set (match_operand:V8HI 0 "register_operand" "=x")
7238 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7239 (parallel [(const_int 0)
7243 (match_operand 2 "const_4_to_7_operand" "")
7244 (match_operand 3 "const_4_to_7_operand" "")
7245 (match_operand 4 "const_4_to_7_operand" "")
7246 (match_operand 5 "const_4_to_7_operand" "")])))]
7250 mask |= (INTVAL (operands[2]) - 4) << 0;
7251 mask |= (INTVAL (operands[3]) - 4) << 2;
7252 mask |= (INTVAL (operands[4]) - 4) << 4;
7253 mask |= (INTVAL (operands[5]) - 4) << 6;
7254 operands[2] = GEN_INT (mask);
7256 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7258 [(set_attr "type" "sselog")
7259 (set_attr "prefix_rep" "1")
7260 (set_attr "prefix_data16" "0")
7261 (set_attr "prefix" "maybe_vex")
7262 (set_attr "length_immediate" "1")
7263 (set_attr "mode" "TI")])
7265 (define_expand "sse2_loadd"
7266 [(set (match_operand:V4SI 0 "register_operand" "")
7269 (match_operand:SI 1 "nonimmediate_operand" ""))
7273 "operands[2] = CONST0_RTX (V4SImode);")
7275 (define_insn "*avx_loadld"
7276 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7279 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7280 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7284 vmovd\t{%2, %0|%0, %2}
7285 vmovd\t{%2, %0|%0, %2}
7286 vmovss\t{%2, %1, %0|%0, %1, %2}"
7287 [(set_attr "type" "ssemov")
7288 (set_attr "prefix" "vex")
7289 (set_attr "mode" "TI,TI,V4SF")])
7291 (define_insn "sse2_loadld"
7292 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7295 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7296 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7300 movd\t{%2, %0|%0, %2}
7301 movd\t{%2, %0|%0, %2}
7302 movss\t{%2, %0|%0, %2}
7303 movss\t{%2, %0|%0, %2}"
7304 [(set_attr "type" "ssemov")
7305 (set_attr "mode" "TI,TI,V4SF,SF")])
7307 (define_insn_and_split "sse2_stored"
7308 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7310 (match_operand:V4SI 1 "register_operand" "x,Yi")
7311 (parallel [(const_int 0)])))]
7314 "&& reload_completed
7315 && (TARGET_INTER_UNIT_MOVES
7316 || MEM_P (operands [0])
7317 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7318 [(set (match_dup 0) (match_dup 1))]
7319 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7321 (define_insn_and_split "*vec_ext_v4si_mem"
7322 [(set (match_operand:SI 0 "register_operand" "=r")
7324 (match_operand:V4SI 1 "memory_operand" "o")
7325 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7331 int i = INTVAL (operands[2]);
7333 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7337 (define_expand "sse_storeq"
7338 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7340 (match_operand:V2DI 1 "register_operand" "")
7341 (parallel [(const_int 0)])))]
7344 (define_insn "*sse2_storeq_rex64"
7345 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7347 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7348 (parallel [(const_int 0)])))]
7349 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7353 %vmov{q}\t{%1, %0|%0, %1}"
7354 [(set_attr "type" "*,*,imov")
7355 (set_attr "prefix" "*,*,maybe_vex")
7356 (set_attr "mode" "*,*,DI")])
7358 (define_insn "*sse2_storeq"
7359 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7361 (match_operand:V2DI 1 "register_operand" "x")
7362 (parallel [(const_int 0)])))]
7367 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7369 (match_operand:V2DI 1 "register_operand" "")
7370 (parallel [(const_int 0)])))]
7373 && (TARGET_INTER_UNIT_MOVES
7374 || MEM_P (operands [0])
7375 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7376 [(set (match_dup 0) (match_dup 1))]
7377 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7379 (define_insn "*vec_extractv2di_1_rex64_avx"
7380 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7382 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7383 (parallel [(const_int 1)])))]
7386 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7388 vmovhps\t{%1, %0|%0, %1}
7389 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7390 vmovq\t{%H1, %0|%0, %H1}
7391 vmov{q}\t{%H1, %0|%0, %H1}"
7392 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7393 (set_attr "length_immediate" "*,1,*,*")
7394 (set_attr "memory" "*,none,*,*")
7395 (set_attr "prefix" "vex")
7396 (set_attr "mode" "V2SF,TI,TI,DI")])
7398 (define_insn "*vec_extractv2di_1_rex64"
7399 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7401 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7402 (parallel [(const_int 1)])))]
7403 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7405 movhps\t{%1, %0|%0, %1}
7406 psrldq\t{$8, %0|%0, 8}
7407 movq\t{%H1, %0|%0, %H1}
7408 mov{q}\t{%H1, %0|%0, %H1}"
7409 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7410 (set_attr "length_immediate" "*,1,*,*")
7411 (set_attr "memory" "*,none,*,*")
7412 (set_attr "mode" "V2SF,TI,TI,DI")])
7414 (define_insn "*vec_extractv2di_1_avx"
7415 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7417 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7418 (parallel [(const_int 1)])))]
7421 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7423 vmovhps\t{%1, %0|%0, %1}
7424 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7425 vmovq\t{%H1, %0|%0, %H1}"
7426 [(set_attr "type" "ssemov,sseishft1,ssemov")
7427 (set_attr "length_immediate" "*,1,*")
7428 (set_attr "memory" "*,none,*")
7429 (set_attr "prefix" "vex")
7430 (set_attr "mode" "V2SF,TI,TI")])
7432 (define_insn "*vec_extractv2di_1_sse2"
7433 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7435 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7436 (parallel [(const_int 1)])))]
7438 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7440 movhps\t{%1, %0|%0, %1}
7441 psrldq\t{$8, %0|%0, 8}
7442 movq\t{%H1, %0|%0, %H1}"
7443 [(set_attr "type" "ssemov,sseishft1,ssemov")
7444 (set_attr "length_immediate" "*,1,*")
7445 (set_attr "memory" "*,none,*")
7446 (set_attr "mode" "V2SF,TI,TI")])
7448 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7449 (define_insn "*vec_extractv2di_1_sse"
7450 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7452 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7453 (parallel [(const_int 1)])))]
7454 "!TARGET_SSE2 && TARGET_SSE
7455 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7457 movhps\t{%1, %0|%0, %1}
7458 movhlps\t{%1, %0|%0, %1}
7459 movlps\t{%H1, %0|%0, %H1}"
7460 [(set_attr "type" "ssemov")
7461 (set_attr "mode" "V2SF,V4SF,V2SF")])
7463 (define_insn "*vec_dupv4si_avx"
7464 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7466 (match_operand:SI 1 "register_operand" "x,m")))]
7469 vpshufd\t{$0, %1, %0|%0, %1, 0}
7470 vbroadcastss\t{%1, %0|%0, %1}"
7471 [(set_attr "type" "sselog1,ssemov")
7472 (set_attr "length_immediate" "1,0")
7473 (set_attr "prefix_extra" "0,1")
7474 (set_attr "prefix" "vex")
7475 (set_attr "mode" "TI,V4SF")])
7477 (define_insn "*vec_dupv4si"
7478 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7480 (match_operand:SI 1 "register_operand" " Y2,0")))]
7483 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7484 shufps\t{$0, %0, %0|%0, %0, 0}"
7485 [(set_attr "type" "sselog1")
7486 (set_attr "length_immediate" "1")
7487 (set_attr "mode" "TI,V4SF")])
7489 (define_insn "*vec_dupv2di_avx"
7490 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7492 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7495 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7496 vmovddup\t{%1, %0|%0, %1}"
7497 [(set_attr "type" "sselog1")
7498 (set_attr "prefix" "vex")
7499 (set_attr "mode" "TI,DF")])
7501 (define_insn "*vec_dupv2di_sse3"
7502 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7504 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7508 movddup\t{%1, %0|%0, %1}"
7509 [(set_attr "type" "sselog1")
7510 (set_attr "mode" "TI,DF")])
7512 (define_insn "*vec_dupv2di"
7513 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7515 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7520 [(set_attr "type" "sselog1,ssemov")
7521 (set_attr "mode" "TI,V4SF")])
7523 (define_insn "*vec_concatv2si_avx"
7524 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7526 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7527 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7530 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7531 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7532 vmovd\t{%1, %0|%0, %1}
7533 punpckldq\t{%2, %0|%0, %2}
7534 movd\t{%1, %0|%0, %1}"
7535 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7536 (set_attr "prefix_extra" "1,*,*,*,*")
7537 (set_attr "length_immediate" "1,*,*,*,*")
7538 (set (attr "prefix")
7539 (if_then_else (eq_attr "alternative" "3,4")
7540 (const_string "orig")
7541 (const_string "vex")))
7542 (set_attr "mode" "TI,TI,TI,DI,DI")])
7544 (define_insn "*vec_concatv2si_sse4_1"
7545 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7547 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7548 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7551 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7552 punpckldq\t{%2, %0|%0, %2}
7553 movd\t{%1, %0|%0, %1}
7554 punpckldq\t{%2, %0|%0, %2}
7555 movd\t{%1, %0|%0, %1}"
7556 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7557 (set_attr "prefix_extra" "1,*,*,*,*")
7558 (set_attr "length_immediate" "1,*,*,*,*")
7559 (set_attr "mode" "TI,TI,TI,DI,DI")])
7561 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7562 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7563 ;; alternatives pretty much forces the MMX alternative to be chosen.
7564 (define_insn "*vec_concatv2si_sse2"
7565 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7567 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7568 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7571 punpckldq\t{%2, %0|%0, %2}
7572 movd\t{%1, %0|%0, %1}
7573 punpckldq\t{%2, %0|%0, %2}
7574 movd\t{%1, %0|%0, %1}"
7575 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7576 (set_attr "mode" "TI,TI,DI,DI")])
7578 (define_insn "*vec_concatv2si_sse"
7579 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7581 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7582 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7585 unpcklps\t{%2, %0|%0, %2}
7586 movss\t{%1, %0|%0, %1}
7587 punpckldq\t{%2, %0|%0, %2}
7588 movd\t{%1, %0|%0, %1}"
7589 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7590 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7592 (define_insn "*vec_concatv4si_1_avx"
7593 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7595 (match_operand:V2SI 1 "register_operand" " x,x")
7596 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7599 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7600 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7601 [(set_attr "type" "sselog,ssemov")
7602 (set_attr "prefix" "vex")
7603 (set_attr "mode" "TI,V2SF")])
7605 (define_insn "*vec_concatv4si_1"
7606 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7608 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7609 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7612 punpcklqdq\t{%2, %0|%0, %2}
7613 movlhps\t{%2, %0|%0, %2}
7614 movhps\t{%2, %0|%0, %2}"
7615 [(set_attr "type" "sselog,ssemov,ssemov")
7616 (set_attr "mode" "TI,V4SF,V2SF")])
7618 (define_insn "*vec_concatv2di_avx"
7619 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7621 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7622 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7623 "!TARGET_64BIT && TARGET_AVX"
7625 vmovq\t{%1, %0|%0, %1}
7626 movq2dq\t{%1, %0|%0, %1}
7627 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7628 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7629 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7630 (set (attr "prefix")
7631 (if_then_else (eq_attr "alternative" "1")
7632 (const_string "orig")
7633 (const_string "vex")))
7634 (set_attr "mode" "TI,TI,TI,V2SF")])
7636 (define_insn "vec_concatv2di"
7637 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7639 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7640 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7641 "!TARGET_64BIT && TARGET_SSE"
7643 movq\t{%1, %0|%0, %1}
7644 movq2dq\t{%1, %0|%0, %1}
7645 punpcklqdq\t{%2, %0|%0, %2}
7646 movlhps\t{%2, %0|%0, %2}
7647 movhps\t{%2, %0|%0, %2}"
7648 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7649 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7651 (define_insn "*vec_concatv2di_rex64_avx"
7652 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7654 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7655 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7656 "TARGET_64BIT && TARGET_AVX"
7658 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7659 vmovq\t{%1, %0|%0, %1}
7660 vmovq\t{%1, %0|%0, %1}
7661 movq2dq\t{%1, %0|%0, %1}
7662 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7663 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7664 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7665 (set_attr "prefix_extra" "1,*,*,*,*,*")
7666 (set_attr "length_immediate" "1,*,*,*,*,*")
7667 (set (attr "prefix")
7668 (if_then_else (eq_attr "alternative" "3")
7669 (const_string "orig")
7670 (const_string "vex")))
7671 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7673 (define_insn "*vec_concatv2di_rex64_sse4_1"
7674 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7676 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7677 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7678 "TARGET_64BIT && TARGET_SSE4_1"
7680 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7681 movq\t{%1, %0|%0, %1}
7682 movq\t{%1, %0|%0, %1}
7683 movq2dq\t{%1, %0|%0, %1}
7684 punpcklqdq\t{%2, %0|%0, %2}
7685 movlhps\t{%2, %0|%0, %2}
7686 movhps\t{%2, %0|%0, %2}"
7687 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7688 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7689 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7690 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7691 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7693 (define_insn "*vec_concatv2di_rex64_sse"
7694 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7696 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7697 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7698 "TARGET_64BIT && TARGET_SSE"
7700 movq\t{%1, %0|%0, %1}
7701 movq\t{%1, %0|%0, %1}
7702 movq2dq\t{%1, %0|%0, %1}
7703 punpcklqdq\t{%2, %0|%0, %2}
7704 movlhps\t{%2, %0|%0, %2}
7705 movhps\t{%2, %0|%0, %2}"
7706 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7707 (set_attr "prefix_rex" "*,1,*,*,*,*")
7708 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7710 (define_expand "vec_unpacku_hi_v16qi"
7711 [(match_operand:V8HI 0 "register_operand" "")
7712 (match_operand:V16QI 1 "register_operand" "")]
7716 ix86_expand_sse4_unpack (operands, true, true);
7718 ix86_expand_sse_unpack (operands, true, true);
7722 (define_expand "vec_unpacks_hi_v16qi"
7723 [(match_operand:V8HI 0 "register_operand" "")
7724 (match_operand:V16QI 1 "register_operand" "")]
7728 ix86_expand_sse4_unpack (operands, false, true);
7730 ix86_expand_sse_unpack (operands, false, true);
7734 (define_expand "vec_unpacku_lo_v16qi"
7735 [(match_operand:V8HI 0 "register_operand" "")
7736 (match_operand:V16QI 1 "register_operand" "")]
7740 ix86_expand_sse4_unpack (operands, true, false);
7742 ix86_expand_sse_unpack (operands, true, false);
7746 (define_expand "vec_unpacks_lo_v16qi"
7747 [(match_operand:V8HI 0 "register_operand" "")
7748 (match_operand:V16QI 1 "register_operand" "")]
7752 ix86_expand_sse4_unpack (operands, false, false);
7754 ix86_expand_sse_unpack (operands, false, false);
7758 (define_expand "vec_unpacku_hi_v8hi"
7759 [(match_operand:V4SI 0 "register_operand" "")
7760 (match_operand:V8HI 1 "register_operand" "")]
7764 ix86_expand_sse4_unpack (operands, true, true);
7766 ix86_expand_sse_unpack (operands, true, true);
7770 (define_expand "vec_unpacks_hi_v8hi"
7771 [(match_operand:V4SI 0 "register_operand" "")
7772 (match_operand:V8HI 1 "register_operand" "")]
7776 ix86_expand_sse4_unpack (operands, false, true);
7778 ix86_expand_sse_unpack (operands, false, true);
7782 (define_expand "vec_unpacku_lo_v8hi"
7783 [(match_operand:V4SI 0 "register_operand" "")
7784 (match_operand:V8HI 1 "register_operand" "")]
7788 ix86_expand_sse4_unpack (operands, true, false);
7790 ix86_expand_sse_unpack (operands, true, false);
7794 (define_expand "vec_unpacks_lo_v8hi"
7795 [(match_operand:V4SI 0 "register_operand" "")
7796 (match_operand:V8HI 1 "register_operand" "")]
7800 ix86_expand_sse4_unpack (operands, false, false);
7802 ix86_expand_sse_unpack (operands, false, false);
7806 (define_expand "vec_unpacku_hi_v4si"
7807 [(match_operand:V2DI 0 "register_operand" "")
7808 (match_operand:V4SI 1 "register_operand" "")]
7812 ix86_expand_sse4_unpack (operands, true, true);
7814 ix86_expand_sse_unpack (operands, true, true);
7818 (define_expand "vec_unpacks_hi_v4si"
7819 [(match_operand:V2DI 0 "register_operand" "")
7820 (match_operand:V4SI 1 "register_operand" "")]
7824 ix86_expand_sse4_unpack (operands, false, true);
7826 ix86_expand_sse_unpack (operands, false, true);
7830 (define_expand "vec_unpacku_lo_v4si"
7831 [(match_operand:V2DI 0 "register_operand" "")
7832 (match_operand:V4SI 1 "register_operand" "")]
7836 ix86_expand_sse4_unpack (operands, true, false);
7838 ix86_expand_sse_unpack (operands, true, false);
7842 (define_expand "vec_unpacks_lo_v4si"
7843 [(match_operand:V2DI 0 "register_operand" "")
7844 (match_operand:V4SI 1 "register_operand" "")]
7848 ix86_expand_sse4_unpack (operands, false, false);
7850 ix86_expand_sse_unpack (operands, false, false);
7854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7858 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7860 (define_expand "sse2_uavgv16qi3"
7861 [(set (match_operand:V16QI 0 "register_operand" "")
7867 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7869 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7870 (const_vector:V16QI [(const_int 1) (const_int 1)
7871 (const_int 1) (const_int 1)
7872 (const_int 1) (const_int 1)
7873 (const_int 1) (const_int 1)
7874 (const_int 1) (const_int 1)
7875 (const_int 1) (const_int 1)
7876 (const_int 1) (const_int 1)
7877 (const_int 1) (const_int 1)]))
7880 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7882 (define_insn "*avx_uavgv16qi3"
7883 [(set (match_operand:V16QI 0 "register_operand" "=x")
7889 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7891 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7892 (const_vector:V16QI [(const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)]))
7901 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7902 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7903 [(set_attr "type" "sseiadd")
7904 (set_attr "prefix" "vex")
7905 (set_attr "mode" "TI")])
7907 (define_insn "*sse2_uavgv16qi3"
7908 [(set (match_operand:V16QI 0 "register_operand" "=x")
7914 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7916 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7917 (const_vector:V16QI [(const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)
7920 (const_int 1) (const_int 1)
7921 (const_int 1) (const_int 1)
7922 (const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)]))
7926 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7927 "pavgb\t{%2, %0|%0, %2}"
7928 [(set_attr "type" "sseiadd")
7929 (set_attr "prefix_data16" "1")
7930 (set_attr "mode" "TI")])
7932 (define_expand "sse2_uavgv8hi3"
7933 [(set (match_operand:V8HI 0 "register_operand" "")
7939 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7941 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7942 (const_vector:V8HI [(const_int 1) (const_int 1)
7943 (const_int 1) (const_int 1)
7944 (const_int 1) (const_int 1)
7945 (const_int 1) (const_int 1)]))
7948 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7950 (define_insn "*avx_uavgv8hi3"
7951 [(set (match_operand:V8HI 0 "register_operand" "=x")
7957 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7959 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7960 (const_vector:V8HI [(const_int 1) (const_int 1)
7961 (const_int 1) (const_int 1)
7962 (const_int 1) (const_int 1)
7963 (const_int 1) (const_int 1)]))
7965 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7966 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7967 [(set_attr "type" "sseiadd")
7968 (set_attr "prefix" "vex")
7969 (set_attr "mode" "TI")])
7971 (define_insn "*sse2_uavgv8hi3"
7972 [(set (match_operand:V8HI 0 "register_operand" "=x")
7978 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7980 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7981 (const_vector:V8HI [(const_int 1) (const_int 1)
7982 (const_int 1) (const_int 1)
7983 (const_int 1) (const_int 1)
7984 (const_int 1) (const_int 1)]))
7986 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7987 "pavgw\t{%2, %0|%0, %2}"
7988 [(set_attr "type" "sseiadd")
7989 (set_attr "prefix_data16" "1")
7990 (set_attr "mode" "TI")])
7992 ;; The correct representation for this is absolutely enormous, and
7993 ;; surely not generally useful.
7994 (define_insn "*avx_psadbw"
7995 [(set (match_operand:V2DI 0 "register_operand" "=x")
7996 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7997 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8000 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8001 [(set_attr "type" "sseiadd")
8002 (set_attr "prefix" "vex")
8003 (set_attr "mode" "TI")])
8005 (define_insn "sse2_psadbw"
8006 [(set (match_operand:V2DI 0 "register_operand" "=x")
8007 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8008 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8011 "psadbw\t{%2, %0|%0, %2}"
8012 [(set_attr "type" "sseiadd")
8013 (set_attr "atom_unit" "simul")
8014 (set_attr "prefix_data16" "1")
8015 (set_attr "mode" "TI")])
8017 (define_insn "avx_movmsk<ssemodesuffix>256"
8018 [(set (match_operand:SI 0 "register_operand" "=r")
8020 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8022 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8023 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8024 [(set_attr "type" "ssecvt")
8025 (set_attr "prefix" "vex")
8026 (set_attr "mode" "<MODE>")])
8028 (define_insn "<sse>_movmsk<ssemodesuffix>"
8029 [(set (match_operand:SI 0 "register_operand" "=r")
8031 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8033 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8034 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8035 [(set_attr "type" "ssemov")
8036 (set_attr "prefix" "maybe_vex")
8037 (set_attr "mode" "<MODE>")])
8039 (define_insn "sse2_pmovmskb"
8040 [(set (match_operand:SI 0 "register_operand" "=r")
8041 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8044 "%vpmovmskb\t{%1, %0|%0, %1}"
8045 [(set_attr "type" "ssemov")
8046 (set_attr "prefix_data16" "1")
8047 (set_attr "prefix" "maybe_vex")
8048 (set_attr "mode" "SI")])
8050 (define_expand "sse2_maskmovdqu"
8051 [(set (match_operand:V16QI 0 "memory_operand" "")
8052 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8053 (match_operand:V16QI 2 "register_operand" "")
8058 (define_insn "*sse2_maskmovdqu"
8059 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8060 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8061 (match_operand:V16QI 2 "register_operand" "x")
8062 (mem:V16QI (match_dup 0))]
8064 "TARGET_SSE2 && !TARGET_64BIT"
8065 ;; @@@ check ordering of operands in intel/nonintel syntax
8066 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8067 [(set_attr "type" "ssemov")
8068 (set_attr "prefix_data16" "1")
8069 ;; The implicit %rdi operand confuses default length_vex computation.
8070 (set_attr "length_vex" "3")
8071 (set_attr "prefix" "maybe_vex")
8072 (set_attr "mode" "TI")])
8074 (define_insn "*sse2_maskmovdqu_rex64"
8075 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8076 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8077 (match_operand:V16QI 2 "register_operand" "x")
8078 (mem:V16QI (match_dup 0))]
8080 "TARGET_SSE2 && TARGET_64BIT"
8081 ;; @@@ check ordering of operands in intel/nonintel syntax
8082 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8083 [(set_attr "type" "ssemov")
8084 (set_attr "prefix_data16" "1")
8085 ;; The implicit %rdi operand confuses default length_vex computation.
8086 (set (attr "length_vex")
8087 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8088 (set_attr "prefix" "maybe_vex")
8089 (set_attr "mode" "TI")])
8091 (define_insn "sse_ldmxcsr"
8092 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8096 [(set_attr "type" "sse")
8097 (set_attr "atom_sse_attr" "mxcsr")
8098 (set_attr "prefix" "maybe_vex")
8099 (set_attr "memory" "load")])
8101 (define_insn "sse_stmxcsr"
8102 [(set (match_operand:SI 0 "memory_operand" "=m")
8103 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8106 [(set_attr "type" "sse")
8107 (set_attr "atom_sse_attr" "mxcsr")
8108 (set_attr "prefix" "maybe_vex")
8109 (set_attr "memory" "store")])
8111 (define_expand "sse_sfence"
8113 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8114 "TARGET_SSE || TARGET_3DNOW_A"
8116 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8117 MEM_VOLATILE_P (operands[0]) = 1;
8120 (define_insn "*sse_sfence"
8121 [(set (match_operand:BLK 0 "" "")
8122 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8123 "TARGET_SSE || TARGET_3DNOW_A"
8125 [(set_attr "type" "sse")
8126 (set_attr "length_address" "0")
8127 (set_attr "atom_sse_attr" "fence")
8128 (set_attr "memory" "unknown")])
8130 (define_insn "sse2_clflush"
8131 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8135 [(set_attr "type" "sse")
8136 (set_attr "atom_sse_attr" "fence")
8137 (set_attr "memory" "unknown")])
8139 (define_expand "sse2_mfence"
8141 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8144 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8145 MEM_VOLATILE_P (operands[0]) = 1;
8148 (define_insn "*sse2_mfence"
8149 [(set (match_operand:BLK 0 "" "")
8150 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8151 "TARGET_64BIT || TARGET_SSE2"
8153 [(set_attr "type" "sse")
8154 (set_attr "length_address" "0")
8155 (set_attr "atom_sse_attr" "fence")
8156 (set_attr "memory" "unknown")])
8158 (define_expand "sse2_lfence"
8160 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8163 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8164 MEM_VOLATILE_P (operands[0]) = 1;
8167 (define_insn "*sse2_lfence"
8168 [(set (match_operand:BLK 0 "" "")
8169 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8172 [(set_attr "type" "sse")
8173 (set_attr "length_address" "0")
8174 (set_attr "atom_sse_attr" "lfence")
8175 (set_attr "memory" "unknown")])
8177 (define_insn "sse3_mwait"
8178 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8179 (match_operand:SI 1 "register_operand" "c")]
8182 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8183 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8184 ;; we only need to set up 32bit registers.
8186 [(set_attr "length" "3")])
8188 (define_insn "sse3_monitor"
8189 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8190 (match_operand:SI 1 "register_operand" "c")
8191 (match_operand:SI 2 "register_operand" "d")]
8193 "TARGET_SSE3 && !TARGET_64BIT"
8194 "monitor\t%0, %1, %2"
8195 [(set_attr "length" "3")])
8197 (define_insn "sse3_monitor64"
8198 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8199 (match_operand:SI 1 "register_operand" "c")
8200 (match_operand:SI 2 "register_operand" "d")]
8202 "TARGET_SSE3 && TARGET_64BIT"
8203 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8204 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8205 ;; zero extended to 64bit, we only need to set up 32bit registers.
8207 [(set_attr "length" "3")])
8209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8211 ;; SSSE3 instructions
8213 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8215 (define_insn "*avx_phaddwv8hi3"
8216 [(set (match_operand:V8HI 0 "register_operand" "=x")
8222 (match_operand:V8HI 1 "register_operand" "x")
8223 (parallel [(const_int 0)]))
8224 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8226 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8227 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8230 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8231 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8233 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8234 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8240 (parallel [(const_int 0)]))
8241 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8243 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8244 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8247 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8248 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8250 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8251 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8253 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8254 [(set_attr "type" "sseiadd")
8255 (set_attr "prefix_extra" "1")
8256 (set_attr "prefix" "vex")
8257 (set_attr "mode" "TI")])
8259 (define_insn "ssse3_phaddwv8hi3"
8260 [(set (match_operand:V8HI 0 "register_operand" "=x")
8266 (match_operand:V8HI 1 "register_operand" "0")
8267 (parallel [(const_int 0)]))
8268 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8270 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8271 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8274 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8275 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8277 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8278 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8283 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8284 (parallel [(const_int 0)]))
8285 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8287 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8288 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8291 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8292 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8294 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8295 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8297 "phaddw\t{%2, %0|%0, %2}"
8298 [(set_attr "type" "sseiadd")
8299 (set_attr "atom_unit" "complex")
8300 (set_attr "prefix_data16" "1")
8301 (set_attr "prefix_extra" "1")
8302 (set_attr "mode" "TI")])
8304 (define_insn "ssse3_phaddwv4hi3"
8305 [(set (match_operand:V4HI 0 "register_operand" "=y")
8310 (match_operand:V4HI 1 "register_operand" "0")
8311 (parallel [(const_int 0)]))
8312 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8314 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8315 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8319 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8320 (parallel [(const_int 0)]))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8326 "phaddw\t{%2, %0|%0, %2}"
8327 [(set_attr "type" "sseiadd")
8328 (set_attr "atom_unit" "complex")
8329 (set_attr "prefix_extra" "1")
8330 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8331 (set_attr "mode" "DI")])
8333 (define_insn "*avx_phadddv4si3"
8334 [(set (match_operand:V4SI 0 "register_operand" "=x")
8339 (match_operand:V4SI 1 "register_operand" "x")
8340 (parallel [(const_int 0)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8343 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8344 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8348 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8349 (parallel [(const_int 0)]))
8350 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8352 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8353 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8355 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8356 [(set_attr "type" "sseiadd")
8357 (set_attr "prefix_extra" "1")
8358 (set_attr "prefix" "vex")
8359 (set_attr "mode" "TI")])
8361 (define_insn "ssse3_phadddv4si3"
8362 [(set (match_operand:V4SI 0 "register_operand" "=x")
8367 (match_operand:V4SI 1 "register_operand" "0")
8368 (parallel [(const_int 0)]))
8369 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8371 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8372 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8376 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8377 (parallel [(const_int 0)]))
8378 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8380 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8381 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8383 "phaddd\t{%2, %0|%0, %2}"
8384 [(set_attr "type" "sseiadd")
8385 (set_attr "atom_unit" "complex")
8386 (set_attr "prefix_data16" "1")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "mode" "TI")])
8390 (define_insn "ssse3_phadddv2si3"
8391 [(set (match_operand:V2SI 0 "register_operand" "=y")
8395 (match_operand:V2SI 1 "register_operand" "0")
8396 (parallel [(const_int 0)]))
8397 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8400 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8401 (parallel [(const_int 0)]))
8402 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8404 "phaddd\t{%2, %0|%0, %2}"
8405 [(set_attr "type" "sseiadd")
8406 (set_attr "atom_unit" "complex")
8407 (set_attr "prefix_extra" "1")
8408 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8409 (set_attr "mode" "DI")])
8411 (define_insn "*avx_phaddswv8hi3"
8412 [(set (match_operand:V8HI 0 "register_operand" "=x")
8418 (match_operand:V8HI 1 "register_operand" "x")
8419 (parallel [(const_int 0)]))
8420 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8423 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8427 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8429 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8430 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8435 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8436 (parallel [(const_int 0)]))
8437 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8444 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8446 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8447 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8449 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8450 [(set_attr "type" "sseiadd")
8451 (set_attr "prefix_extra" "1")
8452 (set_attr "prefix" "vex")
8453 (set_attr "mode" "TI")])
8455 (define_insn "ssse3_phaddswv8hi3"
8456 [(set (match_operand:V8HI 0 "register_operand" "=x")
8462 (match_operand:V8HI 1 "register_operand" "0")
8463 (parallel [(const_int 0)]))
8464 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8466 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8473 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8474 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8479 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8480 (parallel [(const_int 0)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8493 "phaddsw\t{%2, %0|%0, %2}"
8494 [(set_attr "type" "sseiadd")
8495 (set_attr "atom_unit" "complex")
8496 (set_attr "prefix_data16" "1")
8497 (set_attr "prefix_extra" "1")
8498 (set_attr "mode" "TI")])
8500 (define_insn "ssse3_phaddswv4hi3"
8501 [(set (match_operand:V4HI 0 "register_operand" "=y")
8506 (match_operand:V4HI 1 "register_operand" "0")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8515 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8516 (parallel [(const_int 0)]))
8517 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8519 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8520 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8522 "phaddsw\t{%2, %0|%0, %2}"
8523 [(set_attr "type" "sseiadd")
8524 (set_attr "atom_unit" "complex")
8525 (set_attr "prefix_extra" "1")
8526 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8527 (set_attr "mode" "DI")])
8529 (define_insn "*avx_phsubwv8hi3"
8530 [(set (match_operand:V8HI 0 "register_operand" "=x")
8536 (match_operand:V8HI 1 "register_operand" "x")
8537 (parallel [(const_int 0)]))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8540 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8541 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8544 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8545 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8547 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8548 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8554 (parallel [(const_int 0)]))
8555 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8557 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8558 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8561 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8564 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8567 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8568 [(set_attr "type" "sseiadd")
8569 (set_attr "prefix_extra" "1")
8570 (set_attr "prefix" "vex")
8571 (set_attr "mode" "TI")])
8573 (define_insn "ssse3_phsubwv8hi3"
8574 [(set (match_operand:V8HI 0 "register_operand" "=x")
8580 (match_operand:V8HI 1 "register_operand" "0")
8581 (parallel [(const_int 0)]))
8582 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8585 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8588 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8589 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8592 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8597 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8598 (parallel [(const_int 0)]))
8599 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8602 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8605 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8606 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8608 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8611 "phsubw\t{%2, %0|%0, %2}"
8612 [(set_attr "type" "sseiadd")
8613 (set_attr "atom_unit" "complex")
8614 (set_attr "prefix_data16" "1")
8615 (set_attr "prefix_extra" "1")
8616 (set_attr "mode" "TI")])
8618 (define_insn "ssse3_phsubwv4hi3"
8619 [(set (match_operand:V4HI 0 "register_operand" "=y")
8624 (match_operand:V4HI 1 "register_operand" "0")
8625 (parallel [(const_int 0)]))
8626 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8628 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8629 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8633 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8634 (parallel [(const_int 0)]))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8637 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8640 "phsubw\t{%2, %0|%0, %2}"
8641 [(set_attr "type" "sseiadd")
8642 (set_attr "atom_unit" "complex")
8643 (set_attr "prefix_extra" "1")
8644 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8645 (set_attr "mode" "DI")])
8647 (define_insn "*avx_phsubdv4si3"
8648 [(set (match_operand:V4SI 0 "register_operand" "=x")
8653 (match_operand:V4SI 1 "register_operand" "x")
8654 (parallel [(const_int 0)]))
8655 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8657 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8658 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8662 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8663 (parallel [(const_int 0)]))
8664 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8666 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8667 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8669 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8670 [(set_attr "type" "sseiadd")
8671 (set_attr "prefix_extra" "1")
8672 (set_attr "prefix" "vex")
8673 (set_attr "mode" "TI")])
8675 (define_insn "ssse3_phsubdv4si3"
8676 [(set (match_operand:V4SI 0 "register_operand" "=x")
8681 (match_operand:V4SI 1 "register_operand" "0")
8682 (parallel [(const_int 0)]))
8683 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8685 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8686 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8690 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8691 (parallel [(const_int 0)]))
8692 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8694 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8695 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8697 "phsubd\t{%2, %0|%0, %2}"
8698 [(set_attr "type" "sseiadd")
8699 (set_attr "atom_unit" "complex")
8700 (set_attr "prefix_data16" "1")
8701 (set_attr "prefix_extra" "1")
8702 (set_attr "mode" "TI")])
8704 (define_insn "ssse3_phsubdv2si3"
8705 [(set (match_operand:V2SI 0 "register_operand" "=y")
8709 (match_operand:V2SI 1 "register_operand" "0")
8710 (parallel [(const_int 0)]))
8711 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8714 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8715 (parallel [(const_int 0)]))
8716 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8718 "phsubd\t{%2, %0|%0, %2}"
8719 [(set_attr "type" "sseiadd")
8720 (set_attr "atom_unit" "complex")
8721 (set_attr "prefix_extra" "1")
8722 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8723 (set_attr "mode" "DI")])
8725 (define_insn "*avx_phsubswv8hi3"
8726 [(set (match_operand:V8HI 0 "register_operand" "=x")
8732 (match_operand:V8HI 1 "register_operand" "x")
8733 (parallel [(const_int 0)]))
8734 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8736 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8737 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8740 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8741 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8743 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8744 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8749 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8750 (parallel [(const_int 0)]))
8751 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8753 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8754 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8757 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8758 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8760 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8761 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8763 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8764 [(set_attr "type" "sseiadd")
8765 (set_attr "prefix_extra" "1")
8766 (set_attr "prefix" "vex")
8767 (set_attr "mode" "TI")])
8769 (define_insn "ssse3_phsubswv8hi3"
8770 [(set (match_operand:V8HI 0 "register_operand" "=x")
8776 (match_operand:V8HI 1 "register_operand" "0")
8777 (parallel [(const_int 0)]))
8778 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8780 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8781 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8785 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8787 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8788 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8793 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8794 (parallel [(const_int 0)]))
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8797 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8798 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8801 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8804 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8805 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8807 "phsubsw\t{%2, %0|%0, %2}"
8808 [(set_attr "type" "sseiadd")
8809 (set_attr "atom_unit" "complex")
8810 (set_attr "prefix_data16" "1")
8811 (set_attr "prefix_extra" "1")
8812 (set_attr "mode" "TI")])
8814 (define_insn "ssse3_phsubswv4hi3"
8815 [(set (match_operand:V4HI 0 "register_operand" "=y")
8820 (match_operand:V4HI 1 "register_operand" "0")
8821 (parallel [(const_int 0)]))
8822 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8824 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8825 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8829 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8830 (parallel [(const_int 0)]))
8831 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8833 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8834 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8836 "phsubsw\t{%2, %0|%0, %2}"
8837 [(set_attr "type" "sseiadd")
8838 (set_attr "atom_unit" "complex")
8839 (set_attr "prefix_extra" "1")
8840 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8841 (set_attr "mode" "DI")])
8843 (define_insn "*avx_pmaddubsw128"
8844 [(set (match_operand:V8HI 0 "register_operand" "=x")
8849 (match_operand:V16QI 1 "register_operand" "x")
8850 (parallel [(const_int 0)
8860 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8861 (parallel [(const_int 0)
8871 (vec_select:V16QI (match_dup 1)
8872 (parallel [(const_int 1)
8881 (vec_select:V16QI (match_dup 2)
8882 (parallel [(const_int 1)
8889 (const_int 15)]))))))]
8891 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8892 [(set_attr "type" "sseiadd")
8893 (set_attr "prefix_extra" "1")
8894 (set_attr "prefix" "vex")
8895 (set_attr "mode" "TI")])
8897 (define_insn "ssse3_pmaddubsw128"
8898 [(set (match_operand:V8HI 0 "register_operand" "=x")
8903 (match_operand:V16QI 1 "register_operand" "0")
8904 (parallel [(const_int 0)
8914 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8915 (parallel [(const_int 0)
8925 (vec_select:V16QI (match_dup 1)
8926 (parallel [(const_int 1)
8935 (vec_select:V16QI (match_dup 2)
8936 (parallel [(const_int 1)
8943 (const_int 15)]))))))]
8945 "pmaddubsw\t{%2, %0|%0, %2}"
8946 [(set_attr "type" "sseiadd")
8947 (set_attr "atom_unit" "simul")
8948 (set_attr "prefix_data16" "1")
8949 (set_attr "prefix_extra" "1")
8950 (set_attr "mode" "TI")])
8952 (define_insn "ssse3_pmaddubsw"
8953 [(set (match_operand:V4HI 0 "register_operand" "=y")
8958 (match_operand:V8QI 1 "register_operand" "0")
8959 (parallel [(const_int 0)
8965 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8966 (parallel [(const_int 0)
8972 (vec_select:V8QI (match_dup 1)
8973 (parallel [(const_int 1)
8978 (vec_select:V8QI (match_dup 2)
8979 (parallel [(const_int 1)
8982 (const_int 7)]))))))]
8984 "pmaddubsw\t{%2, %0|%0, %2}"
8985 [(set_attr "type" "sseiadd")
8986 (set_attr "atom_unit" "simul")
8987 (set_attr "prefix_extra" "1")
8988 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8989 (set_attr "mode" "DI")])
8991 (define_expand "ssse3_pmulhrswv8hi3"
8992 [(set (match_operand:V8HI 0 "register_operand" "")
8999 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9001 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9003 (const_vector:V8HI [(const_int 1) (const_int 1)
9004 (const_int 1) (const_int 1)
9005 (const_int 1) (const_int 1)
9006 (const_int 1) (const_int 1)]))
9009 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9011 (define_insn "*avx_pmulhrswv8hi3"
9012 [(set (match_operand:V8HI 0 "register_operand" "=x")
9019 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9021 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9023 (const_vector:V8HI [(const_int 1) (const_int 1)
9024 (const_int 1) (const_int 1)
9025 (const_int 1) (const_int 1)
9026 (const_int 1) (const_int 1)]))
9028 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9029 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9030 [(set_attr "type" "sseimul")
9031 (set_attr "prefix_extra" "1")
9032 (set_attr "prefix" "vex")
9033 (set_attr "mode" "TI")])
9035 (define_insn "*ssse3_pmulhrswv8hi3"
9036 [(set (match_operand:V8HI 0 "register_operand" "=x")
9043 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9045 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9047 (const_vector:V8HI [(const_int 1) (const_int 1)
9048 (const_int 1) (const_int 1)
9049 (const_int 1) (const_int 1)
9050 (const_int 1) (const_int 1)]))
9052 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9053 "pmulhrsw\t{%2, %0|%0, %2}"
9054 [(set_attr "type" "sseimul")
9055 (set_attr "prefix_data16" "1")
9056 (set_attr "prefix_extra" "1")
9057 (set_attr "mode" "TI")])
9059 (define_expand "ssse3_pmulhrswv4hi3"
9060 [(set (match_operand:V4HI 0 "register_operand" "")
9067 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9069 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9071 (const_vector:V4HI [(const_int 1) (const_int 1)
9072 (const_int 1) (const_int 1)]))
9075 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9077 (define_insn "*ssse3_pmulhrswv4hi3"
9078 [(set (match_operand:V4HI 0 "register_operand" "=y")
9085 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9087 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9089 (const_vector:V4HI [(const_int 1) (const_int 1)
9090 (const_int 1) (const_int 1)]))
9092 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9093 "pmulhrsw\t{%2, %0|%0, %2}"
9094 [(set_attr "type" "sseimul")
9095 (set_attr "prefix_extra" "1")
9096 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9097 (set_attr "mode" "DI")])
9099 (define_insn "*avx_pshufbv16qi3"
9100 [(set (match_operand:V16QI 0 "register_operand" "=x")
9101 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9102 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9105 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9106 [(set_attr "type" "sselog1")
9107 (set_attr "prefix_extra" "1")
9108 (set_attr "prefix" "vex")
9109 (set_attr "mode" "TI")])
9111 (define_insn "ssse3_pshufbv16qi3"
9112 [(set (match_operand:V16QI 0 "register_operand" "=x")
9113 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9114 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9117 "pshufb\t{%2, %0|%0, %2}";
9118 [(set_attr "type" "sselog1")
9119 (set_attr "prefix_data16" "1")
9120 (set_attr "prefix_extra" "1")
9121 (set_attr "mode" "TI")])
9123 (define_insn "ssse3_pshufbv8qi3"
9124 [(set (match_operand:V8QI 0 "register_operand" "=y")
9125 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9126 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9129 "pshufb\t{%2, %0|%0, %2}";
9130 [(set_attr "type" "sselog1")
9131 (set_attr "prefix_extra" "1")
9132 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9133 (set_attr "mode" "DI")])
9135 (define_insn "*avx_psign<mode>3"
9136 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9138 [(match_operand:SSEMODE124 1 "register_operand" "x")
9139 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9142 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9143 [(set_attr "type" "sselog1")
9144 (set_attr "prefix_extra" "1")
9145 (set_attr "prefix" "vex")
9146 (set_attr "mode" "TI")])
9148 (define_insn "ssse3_psign<mode>3"
9149 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9151 [(match_operand:SSEMODE124 1 "register_operand" "0")
9152 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9155 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9156 [(set_attr "type" "sselog1")
9157 (set_attr "prefix_data16" "1")
9158 (set_attr "prefix_extra" "1")
9159 (set_attr "mode" "TI")])
9161 (define_insn "ssse3_psign<mode>3"
9162 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9164 [(match_operand:MMXMODEI 1 "register_operand" "0")
9165 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9168 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9169 [(set_attr "type" "sselog1")
9170 (set_attr "prefix_extra" "1")
9171 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9172 (set_attr "mode" "DI")])
9174 (define_insn "*avx_palignrti"
9175 [(set (match_operand:TI 0 "register_operand" "=x")
9176 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9177 (match_operand:TI 2 "nonimmediate_operand" "xm")
9178 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9182 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9183 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9185 [(set_attr "type" "sseishft")
9186 (set_attr "prefix_extra" "1")
9187 (set_attr "length_immediate" "1")
9188 (set_attr "prefix" "vex")
9189 (set_attr "mode" "TI")])
9191 (define_insn "ssse3_palignrti"
9192 [(set (match_operand:TI 0 "register_operand" "=x")
9193 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9194 (match_operand:TI 2 "nonimmediate_operand" "xm")
9195 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9199 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9200 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9202 [(set_attr "type" "sseishft")
9203 (set_attr "atom_unit" "sishuf")
9204 (set_attr "prefix_data16" "1")
9205 (set_attr "prefix_extra" "1")
9206 (set_attr "length_immediate" "1")
9207 (set_attr "mode" "TI")])
9209 (define_insn "ssse3_palignrdi"
9210 [(set (match_operand:DI 0 "register_operand" "=y")
9211 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9212 (match_operand:DI 2 "nonimmediate_operand" "ym")
9213 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9217 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9218 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9220 [(set_attr "type" "sseishft")
9221 (set_attr "atom_unit" "sishuf")
9222 (set_attr "prefix_extra" "1")
9223 (set_attr "length_immediate" "1")
9224 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9225 (set_attr "mode" "DI")])
9227 (define_insn "abs<mode>2"
9228 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9229 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9231 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9232 [(set_attr "type" "sselog1")
9233 (set_attr "prefix_data16" "1")
9234 (set_attr "prefix_extra" "1")
9235 (set_attr "prefix" "maybe_vex")
9236 (set_attr "mode" "TI")])
9238 (define_insn "abs<mode>2"
9239 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9240 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9242 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9243 [(set_attr "type" "sselog1")
9244 (set_attr "prefix_rep" "0")
9245 (set_attr "prefix_extra" "1")
9246 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9247 (set_attr "mode" "DI")])
9249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9251 ;; AMD SSE4A instructions
9253 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9255 (define_insn "sse4a_movnt<mode>"
9256 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9258 [(match_operand:MODEF 1 "register_operand" "x")]
9261 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9262 [(set_attr "type" "ssemov")
9263 (set_attr "mode" "<MODE>")])
9265 (define_insn "sse4a_vmmovnt<mode>"
9266 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9267 (unspec:<ssescalarmode>
9268 [(vec_select:<ssescalarmode>
9269 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9270 (parallel [(const_int 0)]))]
9273 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9274 [(set_attr "type" "ssemov")
9275 (set_attr "mode" "<ssescalarmode>")])
9277 (define_insn "sse4a_extrqi"
9278 [(set (match_operand:V2DI 0 "register_operand" "=x")
9279 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9280 (match_operand 2 "const_int_operand" "")
9281 (match_operand 3 "const_int_operand" "")]
9284 "extrq\t{%3, %2, %0|%0, %2, %3}"
9285 [(set_attr "type" "sse")
9286 (set_attr "prefix_data16" "1")
9287 (set_attr "length_immediate" "2")
9288 (set_attr "mode" "TI")])
9290 (define_insn "sse4a_extrq"
9291 [(set (match_operand:V2DI 0 "register_operand" "=x")
9292 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9293 (match_operand:V16QI 2 "register_operand" "x")]
9296 "extrq\t{%2, %0|%0, %2}"
9297 [(set_attr "type" "sse")
9298 (set_attr "prefix_data16" "1")
9299 (set_attr "mode" "TI")])
9301 (define_insn "sse4a_insertqi"
9302 [(set (match_operand:V2DI 0 "register_operand" "=x")
9303 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9304 (match_operand:V2DI 2 "register_operand" "x")
9305 (match_operand 3 "const_int_operand" "")
9306 (match_operand 4 "const_int_operand" "")]
9309 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9310 [(set_attr "type" "sseins")
9311 (set_attr "prefix_data16" "0")
9312 (set_attr "prefix_rep" "1")
9313 (set_attr "length_immediate" "2")
9314 (set_attr "mode" "TI")])
9316 (define_insn "sse4a_insertq"
9317 [(set (match_operand:V2DI 0 "register_operand" "=x")
9318 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9319 (match_operand:V2DI 2 "register_operand" "x")]
9322 "insertq\t{%2, %0|%0, %2}"
9323 [(set_attr "type" "sseins")
9324 (set_attr "prefix_data16" "0")
9325 (set_attr "prefix_rep" "1")
9326 (set_attr "mode" "TI")])
9328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9330 ;; Intel SSE4.1 instructions
9332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9334 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9335 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9336 (vec_merge:AVXMODEF2P
9337 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9338 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9339 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9341 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9342 [(set_attr "type" "ssemov")
9343 (set_attr "prefix_extra" "1")
9344 (set_attr "length_immediate" "1")
9345 (set_attr "prefix" "vex")
9346 (set_attr "mode" "<avxvecmode>")])
9348 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9349 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9351 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9352 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9353 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9356 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9357 [(set_attr "type" "ssemov")
9358 (set_attr "prefix_extra" "1")
9359 (set_attr "length_immediate" "1")
9360 (set_attr "prefix" "vex")
9361 (set_attr "mode" "<avxvecmode>")])
9363 (define_insn "sse4_1_blend<ssemodesuffix>"
9364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9365 (vec_merge:SSEMODEF2P
9366 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9367 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9368 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9370 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9371 [(set_attr "type" "ssemov")
9372 (set_attr "prefix_data16" "1")
9373 (set_attr "prefix_extra" "1")
9374 (set_attr "length_immediate" "1")
9375 (set_attr "mode" "<MODE>")])
9377 (define_insn "sse4_1_blendv<ssemodesuffix>"
9378 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9380 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9381 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9382 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9385 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9386 [(set_attr "type" "ssemov")
9387 (set_attr "prefix_data16" "1")
9388 (set_attr "prefix_extra" "1")
9389 (set_attr "mode" "<MODE>")])
9391 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9392 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9394 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9395 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9396 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9399 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9400 [(set_attr "type" "ssemul")
9401 (set_attr "prefix" "vex")
9402 (set_attr "prefix_extra" "1")
9403 (set_attr "length_immediate" "1")
9404 (set_attr "mode" "<avxvecmode>")])
9406 (define_insn "sse4_1_dp<ssemodesuffix>"
9407 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9409 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9410 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9411 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9414 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9415 [(set_attr "type" "ssemul")
9416 (set_attr "prefix_data16" "1")
9417 (set_attr "prefix_extra" "1")
9418 (set_attr "length_immediate" "1")
9419 (set_attr "mode" "<MODE>")])
9421 (define_insn "sse4_1_movntdqa"
9422 [(set (match_operand:V2DI 0 "register_operand" "=x")
9423 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9426 "%vmovntdqa\t{%1, %0|%0, %1}"
9427 [(set_attr "type" "ssemov")
9428 (set_attr "prefix_extra" "1")
9429 (set_attr "prefix" "maybe_vex")
9430 (set_attr "mode" "TI")])
9432 (define_insn "*avx_mpsadbw"
9433 [(set (match_operand:V16QI 0 "register_operand" "=x")
9434 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9435 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9436 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9439 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9440 [(set_attr "type" "sselog1")
9441 (set_attr "prefix" "vex")
9442 (set_attr "prefix_extra" "1")
9443 (set_attr "length_immediate" "1")
9444 (set_attr "mode" "TI")])
9446 (define_insn "sse4_1_mpsadbw"
9447 [(set (match_operand:V16QI 0 "register_operand" "=x")
9448 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9449 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9450 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9453 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9454 [(set_attr "type" "sselog1")
9455 (set_attr "prefix_extra" "1")
9456 (set_attr "length_immediate" "1")
9457 (set_attr "mode" "TI")])
9459 (define_insn "*avx_packusdw"
9460 [(set (match_operand:V8HI 0 "register_operand" "=x")
9463 (match_operand:V4SI 1 "register_operand" "x"))
9465 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9467 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9468 [(set_attr "type" "sselog")
9469 (set_attr "prefix_extra" "1")
9470 (set_attr "prefix" "vex")
9471 (set_attr "mode" "TI")])
9473 (define_insn "sse4_1_packusdw"
9474 [(set (match_operand:V8HI 0 "register_operand" "=x")
9477 (match_operand:V4SI 1 "register_operand" "0"))
9479 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9481 "packusdw\t{%2, %0|%0, %2}"
9482 [(set_attr "type" "sselog")
9483 (set_attr "prefix_extra" "1")
9484 (set_attr "mode" "TI")])
9486 (define_insn "*avx_pblendvb"
9487 [(set (match_operand:V16QI 0 "register_operand" "=x")
9488 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9489 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9490 (match_operand:V16QI 3 "register_operand" "x")]
9493 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9494 [(set_attr "type" "ssemov")
9495 (set_attr "prefix_extra" "1")
9496 (set_attr "length_immediate" "1")
9497 (set_attr "prefix" "vex")
9498 (set_attr "mode" "TI")])
9500 (define_insn "sse4_1_pblendvb"
9501 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9502 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9503 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9504 (match_operand:V16QI 3 "register_operand" "Yz")]
9507 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9508 [(set_attr "type" "ssemov")
9509 (set_attr "prefix_extra" "1")
9510 (set_attr "mode" "TI")])
9512 (define_insn "*avx_pblendw"
9513 [(set (match_operand:V8HI 0 "register_operand" "=x")
9515 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9516 (match_operand:V8HI 1 "register_operand" "x")
9517 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9519 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9520 [(set_attr "type" "ssemov")
9521 (set_attr "prefix" "vex")
9522 (set_attr "prefix_extra" "1")
9523 (set_attr "length_immediate" "1")
9524 (set_attr "mode" "TI")])
9526 (define_insn "sse4_1_pblendw"
9527 [(set (match_operand:V8HI 0 "register_operand" "=x")
9529 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9530 (match_operand:V8HI 1 "register_operand" "0")
9531 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9533 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9534 [(set_attr "type" "ssemov")
9535 (set_attr "prefix_extra" "1")
9536 (set_attr "length_immediate" "1")
9537 (set_attr "mode" "TI")])
9539 (define_insn "sse4_1_phminposuw"
9540 [(set (match_operand:V8HI 0 "register_operand" "=x")
9541 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9542 UNSPEC_PHMINPOSUW))]
9544 "%vphminposuw\t{%1, %0|%0, %1}"
9545 [(set_attr "type" "sselog1")
9546 (set_attr "prefix_extra" "1")
9547 (set_attr "prefix" "maybe_vex")
9548 (set_attr "mode" "TI")])
9550 (define_insn "sse4_1_<code>v8qiv8hi2"
9551 [(set (match_operand:V8HI 0 "register_operand" "=x")
9554 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9555 (parallel [(const_int 0)
9564 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9565 [(set_attr "type" "ssemov")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "prefix" "maybe_vex")
9568 (set_attr "mode" "TI")])
9570 (define_insn "sse4_1_<code>v4qiv4si2"
9571 [(set (match_operand:V4SI 0 "register_operand" "=x")
9574 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9575 (parallel [(const_int 0)
9580 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9581 [(set_attr "type" "ssemov")
9582 (set_attr "prefix_extra" "1")
9583 (set_attr "prefix" "maybe_vex")
9584 (set_attr "mode" "TI")])
9586 (define_insn "sse4_1_<code>v4hiv4si2"
9587 [(set (match_operand:V4SI 0 "register_operand" "=x")
9590 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9591 (parallel [(const_int 0)
9596 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9597 [(set_attr "type" "ssemov")
9598 (set_attr "prefix_extra" "1")
9599 (set_attr "prefix" "maybe_vex")
9600 (set_attr "mode" "TI")])
9602 (define_insn "sse4_1_<code>v2qiv2di2"
9603 [(set (match_operand:V2DI 0 "register_operand" "=x")
9606 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9607 (parallel [(const_int 0)
9610 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9611 [(set_attr "type" "ssemov")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "prefix" "maybe_vex")
9614 (set_attr "mode" "TI")])
9616 (define_insn "sse4_1_<code>v2hiv2di2"
9617 [(set (match_operand:V2DI 0 "register_operand" "=x")
9620 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9621 (parallel [(const_int 0)
9624 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9625 [(set_attr "type" "ssemov")
9626 (set_attr "prefix_extra" "1")
9627 (set_attr "prefix" "maybe_vex")
9628 (set_attr "mode" "TI")])
9630 (define_insn "sse4_1_<code>v2siv2di2"
9631 [(set (match_operand:V2DI 0 "register_operand" "=x")
9634 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9635 (parallel [(const_int 0)
9638 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9639 [(set_attr "type" "ssemov")
9640 (set_attr "prefix_extra" "1")
9641 (set_attr "prefix" "maybe_vex")
9642 (set_attr "mode" "TI")])
9644 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9645 ;; setting FLAGS_REG. But it is not a really compare instruction.
9646 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9647 [(set (reg:CC FLAGS_REG)
9648 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9649 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9652 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9653 [(set_attr "type" "ssecomi")
9654 (set_attr "prefix_extra" "1")
9655 (set_attr "prefix" "vex")
9656 (set_attr "mode" "<MODE>")])
9658 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9659 ;; But it is not a really compare instruction.
9660 (define_insn "avx_ptest256"
9661 [(set (reg:CC FLAGS_REG)
9662 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9663 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9666 "vptest\t{%1, %0|%0, %1}"
9667 [(set_attr "type" "ssecomi")
9668 (set_attr "prefix_extra" "1")
9669 (set_attr "prefix" "vex")
9670 (set_attr "mode" "OI")])
9672 (define_insn "sse4_1_ptest"
9673 [(set (reg:CC FLAGS_REG)
9674 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9675 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9678 "%vptest\t{%1, %0|%0, %1}"
9679 [(set_attr "type" "ssecomi")
9680 (set_attr "prefix_extra" "1")
9681 (set_attr "prefix" "maybe_vex")
9682 (set_attr "mode" "TI")])
9684 (define_insn "avx_round<ssemodesuffix>256"
9685 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9686 (unspec:AVX256MODEF2P
9687 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9688 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9691 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9692 [(set_attr "type" "ssecvt")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "length_immediate" "1")
9695 (set_attr "prefix" "vex")
9696 (set_attr "mode" "<MODE>")])
9698 (define_insn "sse4_1_round<ssemodesuffix>"
9699 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9701 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9702 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9705 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9706 [(set_attr "type" "ssecvt")
9707 (set_attr "prefix_data16" "1")
9708 (set_attr "prefix_extra" "1")
9709 (set_attr "length_immediate" "1")
9710 (set_attr "prefix" "maybe_vex")
9711 (set_attr "mode" "<MODE>")])
9713 (define_insn "*avx_round<ssescalarmodesuffix>"
9714 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9715 (vec_merge:SSEMODEF2P
9717 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9718 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9720 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9723 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9724 [(set_attr "type" "ssecvt")
9725 (set_attr "prefix_extra" "1")
9726 (set_attr "length_immediate" "1")
9727 (set_attr "prefix" "vex")
9728 (set_attr "mode" "<MODE>")])
9730 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9731 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9732 (vec_merge:SSEMODEF2P
9734 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9735 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9737 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9740 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9741 [(set_attr "type" "ssecvt")
9742 (set_attr "prefix_data16" "1")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "length_immediate" "1")
9745 (set_attr "mode" "<MODE>")])
9747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9749 ;; Intel SSE4.2 string/text processing instructions
9751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9753 (define_insn_and_split "sse4_2_pcmpestr"
9754 [(set (match_operand:SI 0 "register_operand" "=c,c")
9756 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9757 (match_operand:SI 3 "register_operand" "a,a")
9758 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9759 (match_operand:SI 5 "register_operand" "d,d")
9760 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9762 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9770 (set (reg:CC FLAGS_REG)
9779 && can_create_pseudo_p ()"
9784 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9785 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9786 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9789 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9790 operands[3], operands[4],
9791 operands[5], operands[6]));
9793 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9794 operands[3], operands[4],
9795 operands[5], operands[6]));
9796 if (flags && !(ecx || xmm0))
9797 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9798 operands[2], operands[3],
9799 operands[4], operands[5],
9803 [(set_attr "type" "sselog")
9804 (set_attr "prefix_data16" "1")
9805 (set_attr "prefix_extra" "1")
9806 (set_attr "length_immediate" "1")
9807 (set_attr "memory" "none,load")
9808 (set_attr "mode" "TI")])
9810 (define_insn "sse4_2_pcmpestri"
9811 [(set (match_operand:SI 0 "register_operand" "=c,c")
9813 [(match_operand:V16QI 1 "register_operand" "x,x")
9814 (match_operand:SI 2 "register_operand" "a,a")
9815 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9816 (match_operand:SI 4 "register_operand" "d,d")
9817 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9819 (set (reg:CC FLAGS_REG)
9828 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9829 [(set_attr "type" "sselog")
9830 (set_attr "prefix_data16" "1")
9831 (set_attr "prefix_extra" "1")
9832 (set_attr "prefix" "maybe_vex")
9833 (set_attr "length_immediate" "1")
9834 (set_attr "memory" "none,load")
9835 (set_attr "mode" "TI")])
9837 (define_insn "sse4_2_pcmpestrm"
9838 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9840 [(match_operand:V16QI 1 "register_operand" "x,x")
9841 (match_operand:SI 2 "register_operand" "a,a")
9842 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9843 (match_operand:SI 4 "register_operand" "d,d")
9844 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9846 (set (reg:CC FLAGS_REG)
9855 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9856 [(set_attr "type" "sselog")
9857 (set_attr "prefix_data16" "1")
9858 (set_attr "prefix_extra" "1")
9859 (set_attr "length_immediate" "1")
9860 (set_attr "prefix" "maybe_vex")
9861 (set_attr "memory" "none,load")
9862 (set_attr "mode" "TI")])
9864 (define_insn "sse4_2_pcmpestr_cconly"
9865 [(set (reg:CC FLAGS_REG)
9867 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9868 (match_operand:SI 3 "register_operand" "a,a,a,a")
9869 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9870 (match_operand:SI 5 "register_operand" "d,d,d,d")
9871 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9873 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9874 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9877 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9878 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9879 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9880 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9881 [(set_attr "type" "sselog")
9882 (set_attr "prefix_data16" "1")
9883 (set_attr "prefix_extra" "1")
9884 (set_attr "length_immediate" "1")
9885 (set_attr "memory" "none,load,none,load")
9886 (set_attr "prefix" "maybe_vex")
9887 (set_attr "mode" "TI")])
9889 (define_insn_and_split "sse4_2_pcmpistr"
9890 [(set (match_operand:SI 0 "register_operand" "=c,c")
9892 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9893 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9894 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9896 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9902 (set (reg:CC FLAGS_REG)
9909 && can_create_pseudo_p ()"
9914 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9915 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9916 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9919 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9920 operands[3], operands[4]));
9922 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9923 operands[3], operands[4]));
9924 if (flags && !(ecx || xmm0))
9925 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9926 operands[2], operands[3],
9930 [(set_attr "type" "sselog")
9931 (set_attr "prefix_data16" "1")
9932 (set_attr "prefix_extra" "1")
9933 (set_attr "length_immediate" "1")
9934 (set_attr "memory" "none,load")
9935 (set_attr "mode" "TI")])
9937 (define_insn "sse4_2_pcmpistri"
9938 [(set (match_operand:SI 0 "register_operand" "=c,c")
9940 [(match_operand:V16QI 1 "register_operand" "x,x")
9941 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9942 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9944 (set (reg:CC FLAGS_REG)
9951 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9952 [(set_attr "type" "sselog")
9953 (set_attr "prefix_data16" "1")
9954 (set_attr "prefix_extra" "1")
9955 (set_attr "length_immediate" "1")
9956 (set_attr "prefix" "maybe_vex")
9957 (set_attr "memory" "none,load")
9958 (set_attr "mode" "TI")])
9960 (define_insn "sse4_2_pcmpistrm"
9961 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9963 [(match_operand:V16QI 1 "register_operand" "x,x")
9964 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9965 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9967 (set (reg:CC FLAGS_REG)
9974 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9975 [(set_attr "type" "sselog")
9976 (set_attr "prefix_data16" "1")
9977 (set_attr "prefix_extra" "1")
9978 (set_attr "length_immediate" "1")
9979 (set_attr "prefix" "maybe_vex")
9980 (set_attr "memory" "none,load")
9981 (set_attr "mode" "TI")])
9983 (define_insn "sse4_2_pcmpistr_cconly"
9984 [(set (reg:CC FLAGS_REG)
9986 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9987 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9988 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9990 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9991 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9994 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9995 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9996 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9997 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9998 [(set_attr "type" "sselog")
9999 (set_attr "prefix_data16" "1")
10000 (set_attr "prefix_extra" "1")
10001 (set_attr "length_immediate" "1")
10002 (set_attr "memory" "none,load,none,load")
10003 (set_attr "prefix" "maybe_vex")
10004 (set_attr "mode" "TI")])
10006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10008 ;; XOP instructions
10010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10012 ;; XOP parallel integer multiply/add instructions.
10013 ;; Note the XOP multiply/add instructions
10014 ;; a[i] = b[i] * c[i] + d[i];
10015 ;; do not allow the value being added to be a memory operation.
10016 (define_insn "xop_pmacsww"
10017 [(set (match_operand:V8HI 0 "register_operand" "=x")
10020 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10021 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10022 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10024 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10025 [(set_attr "type" "ssemuladd")
10026 (set_attr "mode" "TI")])
10028 (define_insn "xop_pmacssww"
10029 [(set (match_operand:V8HI 0 "register_operand" "=x")
10031 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10032 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10033 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10035 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10036 [(set_attr "type" "ssemuladd")
10037 (set_attr "mode" "TI")])
10039 (define_insn "xop_pmacsdd"
10040 [(set (match_operand:V4SI 0 "register_operand" "=x")
10043 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10044 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10045 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10047 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10048 [(set_attr "type" "ssemuladd")
10049 (set_attr "mode" "TI")])
10051 (define_insn "xop_pmacssdd"
10052 [(set (match_operand:V4SI 0 "register_operand" "=x")
10054 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10055 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10056 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10058 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10059 [(set_attr "type" "ssemuladd")
10060 (set_attr "mode" "TI")])
10062 (define_insn "xop_pmacssdql"
10063 [(set (match_operand:V2DI 0 "register_operand" "=x")
10068 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10069 (parallel [(const_int 1)
10072 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10073 (parallel [(const_int 1)
10075 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10077 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10078 [(set_attr "type" "ssemuladd")
10079 (set_attr "mode" "TI")])
10081 (define_insn "xop_pmacssdqh"
10082 [(set (match_operand:V2DI 0 "register_operand" "=x")
10087 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10088 (parallel [(const_int 0)
10092 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10093 (parallel [(const_int 0)
10095 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10097 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10098 [(set_attr "type" "ssemuladd")
10099 (set_attr "mode" "TI")])
10101 (define_insn "xop_pmacsdql"
10102 [(set (match_operand:V2DI 0 "register_operand" "=x")
10107 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10108 (parallel [(const_int 1)
10112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10113 (parallel [(const_int 1)
10115 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10117 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10118 [(set_attr "type" "ssemuladd")
10119 (set_attr "mode" "TI")])
10121 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10122 ;; fake it with a multiply/add. In general, we expect the define_split to
10123 ;; occur before register allocation, so we have to handle the corner case where
10124 ;; the target is the same as operands 1/2
10125 (define_insn_and_split "xop_mulv2div2di3_low"
10126 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10130 (match_operand:V4SI 1 "register_operand" "%x")
10131 (parallel [(const_int 1)
10135 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10136 (parallel [(const_int 1)
10137 (const_int 3)])))))]
10140 "&& reload_completed"
10141 [(set (match_dup 0)
10149 (parallel [(const_int 1)
10154 (parallel [(const_int 1)
10158 operands[3] = CONST0_RTX (V2DImode);
10160 [(set_attr "type" "ssemul")
10161 (set_attr "mode" "TI")])
10163 (define_insn "xop_pmacsdqh"
10164 [(set (match_operand:V2DI 0 "register_operand" "=x")
10169 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10170 (parallel [(const_int 0)
10174 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10175 (parallel [(const_int 0)
10177 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10179 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10180 [(set_attr "type" "ssemuladd")
10181 (set_attr "mode" "TI")])
10183 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10184 ;; fake it with a multiply/add. In general, we expect the define_split to
10185 ;; occur before register allocation, so we have to handle the corner case where
10186 ;; the target is the same as either operands[1] or operands[2]
10187 (define_insn_and_split "xop_mulv2div2di3_high"
10188 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10192 (match_operand:V4SI 1 "register_operand" "%x")
10193 (parallel [(const_int 0)
10197 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10198 (parallel [(const_int 0)
10199 (const_int 2)])))))]
10202 "&& reload_completed"
10203 [(set (match_dup 0)
10211 (parallel [(const_int 0)
10216 (parallel [(const_int 0)
10220 operands[3] = CONST0_RTX (V2DImode);
10222 [(set_attr "type" "ssemul")
10223 (set_attr "mode" "TI")])
10225 ;; XOP parallel integer multiply/add instructions for the intrinisics
10226 (define_insn "xop_pmacsswd"
10227 [(set (match_operand:V4SI 0 "register_operand" "=x")
10232 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10233 (parallel [(const_int 1)
10239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10240 (parallel [(const_int 1)
10244 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10246 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10247 [(set_attr "type" "ssemuladd")
10248 (set_attr "mode" "TI")])
10250 (define_insn "xop_pmacswd"
10251 [(set (match_operand:V4SI 0 "register_operand" "=x")
10256 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10257 (parallel [(const_int 1)
10263 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10264 (parallel [(const_int 1)
10268 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10270 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10271 [(set_attr "type" "ssemuladd")
10272 (set_attr "mode" "TI")])
10274 (define_insn "xop_pmadcsswd"
10275 [(set (match_operand:V4SI 0 "register_operand" "=x")
10281 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10282 (parallel [(const_int 0)
10288 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10289 (parallel [(const_int 0)
10297 (parallel [(const_int 1)
10304 (parallel [(const_int 1)
10307 (const_int 7)])))))
10308 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10310 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10311 [(set_attr "type" "ssemuladd")
10312 (set_attr "mode" "TI")])
10314 (define_insn "xop_pmadcswd"
10315 [(set (match_operand:V4SI 0 "register_operand" "=x")
10321 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10322 (parallel [(const_int 0)
10328 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10329 (parallel [(const_int 0)
10337 (parallel [(const_int 1)
10344 (parallel [(const_int 1)
10347 (const_int 7)])))))
10348 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10350 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10351 [(set_attr "type" "ssemuladd")
10352 (set_attr "mode" "TI")])
10354 ;; XOP parallel XMM conditional moves
10355 (define_insn "xop_pcmov_<mode>"
10356 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10357 (if_then_else:SSEMODE
10358 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10359 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10360 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10362 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10363 [(set_attr "type" "sse4arg")])
10365 (define_insn "xop_pcmov_<mode>256"
10366 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10367 (if_then_else:AVX256MODE
10368 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10369 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10370 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10372 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10373 [(set_attr "type" "sse4arg")])
10375 ;; XOP horizontal add/subtract instructions
10376 (define_insn "xop_phaddbw"
10377 [(set (match_operand:V8HI 0 "register_operand" "=x")
10381 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10382 (parallel [(const_int 0)
10393 (parallel [(const_int 1)
10400 (const_int 15)])))))]
10402 "vphaddbw\t{%1, %0|%0, %1}"
10403 [(set_attr "type" "sseiadd1")])
10405 (define_insn "xop_phaddbd"
10406 [(set (match_operand:V4SI 0 "register_operand" "=x")
10411 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10412 (parallel [(const_int 0)
10419 (parallel [(const_int 1)
10422 (const_int 13)]))))
10427 (parallel [(const_int 2)
10434 (parallel [(const_int 3)
10437 (const_int 15)]))))))]
10439 "vphaddbd\t{%1, %0|%0, %1}"
10440 [(set_attr "type" "sseiadd1")])
10442 (define_insn "xop_phaddbq"
10443 [(set (match_operand:V2DI 0 "register_operand" "=x")
10449 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10450 (parallel [(const_int 0)
10455 (parallel [(const_int 1)
10461 (parallel [(const_int 2)
10466 (parallel [(const_int 3)
10467 (const_int 7)])))))
10473 (parallel [(const_int 8)
10478 (parallel [(const_int 9)
10479 (const_int 13)]))))
10484 (parallel [(const_int 10)
10489 (parallel [(const_int 11)
10490 (const_int 15)])))))))]
10492 "vphaddbq\t{%1, %0|%0, %1}"
10493 [(set_attr "type" "sseiadd1")])
10495 (define_insn "xop_phaddwd"
10496 [(set (match_operand:V4SI 0 "register_operand" "=x")
10500 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10501 (parallel [(const_int 0)
10508 (parallel [(const_int 1)
10511 (const_int 7)])))))]
10513 "vphaddwd\t{%1, %0|%0, %1}"
10514 [(set_attr "type" "sseiadd1")])
10516 (define_insn "xop_phaddwq"
10517 [(set (match_operand:V2DI 0 "register_operand" "=x")
10522 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10523 (parallel [(const_int 0)
10528 (parallel [(const_int 1)
10534 (parallel [(const_int 2)
10539 (parallel [(const_int 3)
10540 (const_int 7)]))))))]
10542 "vphaddwq\t{%1, %0|%0, %1}"
10543 [(set_attr "type" "sseiadd1")])
10545 (define_insn "xop_phadddq"
10546 [(set (match_operand:V2DI 0 "register_operand" "=x")
10550 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10551 (parallel [(const_int 0)
10556 (parallel [(const_int 1)
10557 (const_int 3)])))))]
10559 "vphadddq\t{%1, %0|%0, %1}"
10560 [(set_attr "type" "sseiadd1")])
10562 (define_insn "xop_phaddubw"
10563 [(set (match_operand:V8HI 0 "register_operand" "=x")
10567 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10568 (parallel [(const_int 0)
10579 (parallel [(const_int 1)
10586 (const_int 15)])))))]
10588 "vphaddubw\t{%1, %0|%0, %1}"
10589 [(set_attr "type" "sseiadd1")])
10591 (define_insn "xop_phaddubd"
10592 [(set (match_operand:V4SI 0 "register_operand" "=x")
10597 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10598 (parallel [(const_int 0)
10605 (parallel [(const_int 1)
10608 (const_int 13)]))))
10613 (parallel [(const_int 2)
10620 (parallel [(const_int 3)
10623 (const_int 15)]))))))]
10625 "vphaddubd\t{%1, %0|%0, %1}"
10626 [(set_attr "type" "sseiadd1")])
10628 (define_insn "xop_phaddubq"
10629 [(set (match_operand:V2DI 0 "register_operand" "=x")
10635 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10636 (parallel [(const_int 0)
10641 (parallel [(const_int 1)
10647 (parallel [(const_int 2)
10652 (parallel [(const_int 3)
10653 (const_int 7)])))))
10659 (parallel [(const_int 8)
10664 (parallel [(const_int 9)
10665 (const_int 13)]))))
10670 (parallel [(const_int 10)
10675 (parallel [(const_int 11)
10676 (const_int 15)])))))))]
10678 "vphaddubq\t{%1, %0|%0, %1}"
10679 [(set_attr "type" "sseiadd1")])
10681 (define_insn "xop_phadduwd"
10682 [(set (match_operand:V4SI 0 "register_operand" "=x")
10686 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10687 (parallel [(const_int 0)
10694 (parallel [(const_int 1)
10697 (const_int 7)])))))]
10699 "vphadduwd\t{%1, %0|%0, %1}"
10700 [(set_attr "type" "sseiadd1")])
10702 (define_insn "xop_phadduwq"
10703 [(set (match_operand:V2DI 0 "register_operand" "=x")
10708 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10709 (parallel [(const_int 0)
10714 (parallel [(const_int 1)
10720 (parallel [(const_int 2)
10725 (parallel [(const_int 3)
10726 (const_int 7)]))))))]
10728 "vphadduwq\t{%1, %0|%0, %1}"
10729 [(set_attr "type" "sseiadd1")])
10731 (define_insn "xop_phaddudq"
10732 [(set (match_operand:V2DI 0 "register_operand" "=x")
10736 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10737 (parallel [(const_int 0)
10742 (parallel [(const_int 1)
10743 (const_int 3)])))))]
10745 "vphaddudq\t{%1, %0|%0, %1}"
10746 [(set_attr "type" "sseiadd1")])
10748 (define_insn "xop_phsubbw"
10749 [(set (match_operand:V8HI 0 "register_operand" "=x")
10753 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10754 (parallel [(const_int 0)
10765 (parallel [(const_int 1)
10772 (const_int 15)])))))]
10774 "vphsubbw\t{%1, %0|%0, %1}"
10775 [(set_attr "type" "sseiadd1")])
10777 (define_insn "xop_phsubwd"
10778 [(set (match_operand:V4SI 0 "register_operand" "=x")
10782 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10783 (parallel [(const_int 0)
10790 (parallel [(const_int 1)
10793 (const_int 7)])))))]
10795 "vphsubwd\t{%1, %0|%0, %1}"
10796 [(set_attr "type" "sseiadd1")])
10798 (define_insn "xop_phsubdq"
10799 [(set (match_operand:V2DI 0 "register_operand" "=x")
10803 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10804 (parallel [(const_int 0)
10809 (parallel [(const_int 1)
10810 (const_int 3)])))))]
10812 "vphsubdq\t{%1, %0|%0, %1}"
10813 [(set_attr "type" "sseiadd1")])
10815 ;; XOP permute instructions
10816 (define_insn "xop_pperm"
10817 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10819 [(match_operand:V16QI 1 "register_operand" "x,x")
10820 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10821 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10822 UNSPEC_XOP_PERMUTE))]
10823 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10824 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10825 [(set_attr "type" "sse4arg")
10826 (set_attr "mode" "TI")])
10828 ;; XOP pack instructions that combine two vectors into a smaller vector
10829 (define_insn "xop_pperm_pack_v2di_v4si"
10830 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10833 (match_operand:V2DI 1 "register_operand" "x,x"))
10835 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10836 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10837 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10838 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10839 [(set_attr "type" "sse4arg")
10840 (set_attr "mode" "TI")])
10842 (define_insn "xop_pperm_pack_v4si_v8hi"
10843 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10846 (match_operand:V4SI 1 "register_operand" "x,x"))
10848 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10849 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10850 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10851 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10852 [(set_attr "type" "sse4arg")
10853 (set_attr "mode" "TI")])
10855 (define_insn "xop_pperm_pack_v8hi_v16qi"
10856 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10859 (match_operand:V8HI 1 "register_operand" "x,x"))
10861 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10862 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10863 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10864 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10865 [(set_attr "type" "sse4arg")
10866 (set_attr "mode" "TI")])
10868 ;; XOP packed rotate instructions
10869 (define_expand "rotl<mode>3"
10870 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10871 (rotate:SSEMODE1248
10872 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10873 (match_operand:SI 2 "general_operand")))]
10876 /* If we were given a scalar, convert it to parallel */
10877 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10879 rtvec vs = rtvec_alloc (<ssescalarnum>);
10880 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10881 rtx reg = gen_reg_rtx (<MODE>mode);
10882 rtx op2 = operands[2];
10885 if (GET_MODE (op2) != <ssescalarmode>mode)
10887 op2 = gen_reg_rtx (<ssescalarmode>mode);
10888 convert_move (op2, operands[2], false);
10891 for (i = 0; i < <ssescalarnum>; i++)
10892 RTVEC_ELT (vs, i) = op2;
10894 emit_insn (gen_vec_init<mode> (reg, par));
10895 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10900 (define_expand "rotr<mode>3"
10901 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10902 (rotatert:SSEMODE1248
10903 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10904 (match_operand:SI 2 "general_operand")))]
10907 /* If we were given a scalar, convert it to parallel */
10908 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10910 rtvec vs = rtvec_alloc (<ssescalarnum>);
10911 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10912 rtx neg = gen_reg_rtx (<MODE>mode);
10913 rtx reg = gen_reg_rtx (<MODE>mode);
10914 rtx op2 = operands[2];
10917 if (GET_MODE (op2) != <ssescalarmode>mode)
10919 op2 = gen_reg_rtx (<ssescalarmode>mode);
10920 convert_move (op2, operands[2], false);
10923 for (i = 0; i < <ssescalarnum>; i++)
10924 RTVEC_ELT (vs, i) = op2;
10926 emit_insn (gen_vec_init<mode> (reg, par));
10927 emit_insn (gen_neg<mode>2 (neg, reg));
10928 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10933 (define_insn "xop_rotl<mode>3"
10934 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10935 (rotate:SSEMODE1248
10936 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10937 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10939 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10940 [(set_attr "type" "sseishft")
10941 (set_attr "length_immediate" "1")
10942 (set_attr "mode" "TI")])
10944 (define_insn "xop_rotr<mode>3"
10945 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10946 (rotatert:SSEMODE1248
10947 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10948 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10951 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10952 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10954 [(set_attr "type" "sseishft")
10955 (set_attr "length_immediate" "1")
10956 (set_attr "mode" "TI")])
10958 (define_expand "vrotr<mode>3"
10959 [(match_operand:SSEMODE1248 0 "register_operand" "")
10960 (match_operand:SSEMODE1248 1 "register_operand" "")
10961 (match_operand:SSEMODE1248 2 "register_operand" "")]
10964 rtx reg = gen_reg_rtx (<MODE>mode);
10965 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10966 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10970 (define_expand "vrotl<mode>3"
10971 [(match_operand:SSEMODE1248 0 "register_operand" "")
10972 (match_operand:SSEMODE1248 1 "register_operand" "")
10973 (match_operand:SSEMODE1248 2 "register_operand" "")]
10976 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10980 (define_insn "xop_vrotl<mode>3"
10981 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10982 (if_then_else:SSEMODE1248
10984 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10986 (rotate:SSEMODE1248
10987 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10989 (rotatert:SSEMODE1248
10991 (neg:SSEMODE1248 (match_dup 2)))))]
10992 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10993 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10994 [(set_attr "type" "sseishft")
10995 (set_attr "prefix_data16" "0")
10996 (set_attr "prefix_extra" "2")
10997 (set_attr "mode" "TI")])
10999 ;; XOP packed shift instructions.
11000 ;; FIXME: add V2DI back in
11001 (define_expand "vlshr<mode>3"
11002 [(match_operand:SSEMODE124 0 "register_operand" "")
11003 (match_operand:SSEMODE124 1 "register_operand" "")
11004 (match_operand:SSEMODE124 2 "register_operand" "")]
11007 rtx neg = gen_reg_rtx (<MODE>mode);
11008 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11009 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11013 (define_expand "vashr<mode>3"
11014 [(match_operand:SSEMODE124 0 "register_operand" "")
11015 (match_operand:SSEMODE124 1 "register_operand" "")
11016 (match_operand:SSEMODE124 2 "register_operand" "")]
11019 rtx neg = gen_reg_rtx (<MODE>mode);
11020 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11021 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11025 (define_expand "vashl<mode>3"
11026 [(match_operand:SSEMODE124 0 "register_operand" "")
11027 (match_operand:SSEMODE124 1 "register_operand" "")
11028 (match_operand:SSEMODE124 2 "register_operand" "")]
11031 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11035 (define_insn "xop_ashl<mode>3"
11036 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11037 (if_then_else:SSEMODE1248
11039 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11041 (ashift:SSEMODE1248
11042 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11044 (ashiftrt:SSEMODE1248
11046 (neg:SSEMODE1248 (match_dup 2)))))]
11047 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11048 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11049 [(set_attr "type" "sseishft")
11050 (set_attr "prefix_data16" "0")
11051 (set_attr "prefix_extra" "2")
11052 (set_attr "mode" "TI")])
11054 (define_insn "xop_lshl<mode>3"
11055 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11056 (if_then_else:SSEMODE1248
11058 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11060 (ashift:SSEMODE1248
11061 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11063 (lshiftrt:SSEMODE1248
11065 (neg:SSEMODE1248 (match_dup 2)))))]
11066 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11067 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11068 [(set_attr "type" "sseishft")
11069 (set_attr "prefix_data16" "0")
11070 (set_attr "prefix_extra" "2")
11071 (set_attr "mode" "TI")])
11073 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11074 (define_expand "ashlv16qi3"
11075 [(match_operand:V16QI 0 "register_operand" "")
11076 (match_operand:V16QI 1 "register_operand" "")
11077 (match_operand:SI 2 "nonmemory_operand" "")]
11080 rtvec vs = rtvec_alloc (16);
11081 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11082 rtx reg = gen_reg_rtx (V16QImode);
11084 for (i = 0; i < 16; i++)
11085 RTVEC_ELT (vs, i) = operands[2];
11087 emit_insn (gen_vec_initv16qi (reg, par));
11088 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11092 (define_expand "lshlv16qi3"
11093 [(match_operand:V16QI 0 "register_operand" "")
11094 (match_operand:V16QI 1 "register_operand" "")
11095 (match_operand:SI 2 "nonmemory_operand" "")]
11098 rtvec vs = rtvec_alloc (16);
11099 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11100 rtx reg = gen_reg_rtx (V16QImode);
11102 for (i = 0; i < 16; i++)
11103 RTVEC_ELT (vs, i) = operands[2];
11105 emit_insn (gen_vec_initv16qi (reg, par));
11106 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11110 (define_expand "ashrv16qi3"
11111 [(match_operand:V16QI 0 "register_operand" "")
11112 (match_operand:V16QI 1 "register_operand" "")
11113 (match_operand:SI 2 "nonmemory_operand" "")]
11116 rtvec vs = rtvec_alloc (16);
11117 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11118 rtx reg = gen_reg_rtx (V16QImode);
11120 rtx ele = ((CONST_INT_P (operands[2]))
11121 ? GEN_INT (- INTVAL (operands[2]))
11124 for (i = 0; i < 16; i++)
11125 RTVEC_ELT (vs, i) = ele;
11127 emit_insn (gen_vec_initv16qi (reg, par));
11129 if (!CONST_INT_P (operands[2]))
11131 rtx neg = gen_reg_rtx (V16QImode);
11132 emit_insn (gen_negv16qi2 (neg, reg));
11133 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11136 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11141 (define_expand "ashrv2di3"
11142 [(match_operand:V2DI 0 "register_operand" "")
11143 (match_operand:V2DI 1 "register_operand" "")
11144 (match_operand:DI 2 "nonmemory_operand" "")]
11147 rtvec vs = rtvec_alloc (2);
11148 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11149 rtx reg = gen_reg_rtx (V2DImode);
11152 if (CONST_INT_P (operands[2]))
11153 ele = GEN_INT (- INTVAL (operands[2]));
11154 else if (GET_MODE (operands[2]) != DImode)
11156 rtx move = gen_reg_rtx (DImode);
11157 ele = gen_reg_rtx (DImode);
11158 convert_move (move, operands[2], false);
11159 emit_insn (gen_negdi2 (ele, move));
11163 ele = gen_reg_rtx (DImode);
11164 emit_insn (gen_negdi2 (ele, operands[2]));
11167 RTVEC_ELT (vs, 0) = ele;
11168 RTVEC_ELT (vs, 1) = ele;
11169 emit_insn (gen_vec_initv2di (reg, par));
11170 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11174 ;; XOP FRCZ support
11175 (define_insn "xop_frcz<mode>2"
11176 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11178 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11181 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11182 [(set_attr "type" "ssecvt1")
11183 (set_attr "mode" "<MODE>")])
11186 (define_expand "xop_vmfrcz<mode>2"
11187 [(set (match_operand:SSEMODEF2P 0 "register_operand")
11188 (vec_merge:SSEMODEF2P
11190 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
11196 operands[3] = CONST0_RTX (<MODE>mode);
11199 (define_insn "*xop_vmfrcz_<mode>"
11200 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11201 (vec_merge:SSEMODEF2P
11203 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11205 (match_operand:SSEMODEF2P 2 "const0_operand")
11208 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11209 [(set_attr "type" "ssecvt1")
11210 (set_attr "mode" "<MODE>")])
11212 (define_insn "xop_maskcmp<mode>3"
11213 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11214 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11215 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11216 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11218 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11219 [(set_attr "type" "sse4arg")
11220 (set_attr "prefix_data16" "0")
11221 (set_attr "prefix_rep" "0")
11222 (set_attr "prefix_extra" "2")
11223 (set_attr "length_immediate" "1")
11224 (set_attr "mode" "TI")])
11226 (define_insn "xop_maskcmp_uns<mode>3"
11227 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11228 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11229 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11230 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11232 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11233 [(set_attr "type" "ssecmp")
11234 (set_attr "prefix_data16" "0")
11235 (set_attr "prefix_rep" "0")
11236 (set_attr "prefix_extra" "2")
11237 (set_attr "length_immediate" "1")
11238 (set_attr "mode" "TI")])
11240 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11241 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11242 ;; the exact instruction generated for the intrinsic.
11243 (define_insn "xop_maskcmp_uns2<mode>3"
11244 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11245 (unspec:SSEMODE1248
11246 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11247 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11248 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11249 UNSPEC_XOP_UNSIGNED_CMP))]
11251 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11252 [(set_attr "type" "ssecmp")
11253 (set_attr "prefix_data16" "0")
11254 (set_attr "prefix_extra" "2")
11255 (set_attr "length_immediate" "1")
11256 (set_attr "mode" "TI")])
11258 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11259 ;; being added here to be complete.
11260 (define_insn "xop_pcom_tf<mode>3"
11261 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11262 (unspec:SSEMODE1248
11263 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11264 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11265 (match_operand:SI 3 "const_int_operand" "n")]
11266 UNSPEC_XOP_TRUEFALSE))]
11269 return ((INTVAL (operands[3]) != 0)
11270 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11271 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11273 [(set_attr "type" "ssecmp")
11274 (set_attr "prefix_data16" "0")
11275 (set_attr "prefix_extra" "2")
11276 (set_attr "length_immediate" "1")
11277 (set_attr "mode" "TI")])
11279 (define_insn "xop_vpermil2<mode>3"
11280 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11282 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11283 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11284 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11285 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11288 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11289 [(set_attr "type" "sse4arg")
11290 (set_attr "length_immediate" "1")
11291 (set_attr "mode" "<MODE>")])
11293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11294 (define_insn "*avx_aesenc"
11295 [(set (match_operand:V2DI 0 "register_operand" "=x")
11296 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11297 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11299 "TARGET_AES && TARGET_AVX"
11300 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11301 [(set_attr "type" "sselog1")
11302 (set_attr "prefix_extra" "1")
11303 (set_attr "prefix" "vex")
11304 (set_attr "mode" "TI")])
11306 (define_insn "aesenc"
11307 [(set (match_operand:V2DI 0 "register_operand" "=x")
11308 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11309 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11312 "aesenc\t{%2, %0|%0, %2}"
11313 [(set_attr "type" "sselog1")
11314 (set_attr "prefix_extra" "1")
11315 (set_attr "mode" "TI")])
11317 (define_insn "*avx_aesenclast"
11318 [(set (match_operand:V2DI 0 "register_operand" "=x")
11319 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11320 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11321 UNSPEC_AESENCLAST))]
11322 "TARGET_AES && TARGET_AVX"
11323 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11324 [(set_attr "type" "sselog1")
11325 (set_attr "prefix_extra" "1")
11326 (set_attr "prefix" "vex")
11327 (set_attr "mode" "TI")])
11329 (define_insn "aesenclast"
11330 [(set (match_operand:V2DI 0 "register_operand" "=x")
11331 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11332 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11333 UNSPEC_AESENCLAST))]
11335 "aesenclast\t{%2, %0|%0, %2}"
11336 [(set_attr "type" "sselog1")
11337 (set_attr "prefix_extra" "1")
11338 (set_attr "mode" "TI")])
11340 (define_insn "*avx_aesdec"
11341 [(set (match_operand:V2DI 0 "register_operand" "=x")
11342 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11343 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11345 "TARGET_AES && TARGET_AVX"
11346 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11347 [(set_attr "type" "sselog1")
11348 (set_attr "prefix_extra" "1")
11349 (set_attr "prefix" "vex")
11350 (set_attr "mode" "TI")])
11352 (define_insn "aesdec"
11353 [(set (match_operand:V2DI 0 "register_operand" "=x")
11354 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11355 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11358 "aesdec\t{%2, %0|%0, %2}"
11359 [(set_attr "type" "sselog1")
11360 (set_attr "prefix_extra" "1")
11361 (set_attr "mode" "TI")])
11363 (define_insn "*avx_aesdeclast"
11364 [(set (match_operand:V2DI 0 "register_operand" "=x")
11365 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11366 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11367 UNSPEC_AESDECLAST))]
11368 "TARGET_AES && TARGET_AVX"
11369 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11370 [(set_attr "type" "sselog1")
11371 (set_attr "prefix_extra" "1")
11372 (set_attr "prefix" "vex")
11373 (set_attr "mode" "TI")])
11375 (define_insn "aesdeclast"
11376 [(set (match_operand:V2DI 0 "register_operand" "=x")
11377 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11378 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11379 UNSPEC_AESDECLAST))]
11381 "aesdeclast\t{%2, %0|%0, %2}"
11382 [(set_attr "type" "sselog1")
11383 (set_attr "prefix_extra" "1")
11384 (set_attr "mode" "TI")])
11386 (define_insn "aesimc"
11387 [(set (match_operand:V2DI 0 "register_operand" "=x")
11388 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11391 "%vaesimc\t{%1, %0|%0, %1}"
11392 [(set_attr "type" "sselog1")
11393 (set_attr "prefix_extra" "1")
11394 (set_attr "prefix" "maybe_vex")
11395 (set_attr "mode" "TI")])
11397 (define_insn "aeskeygenassist"
11398 [(set (match_operand:V2DI 0 "register_operand" "=x")
11399 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11400 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11401 UNSPEC_AESKEYGENASSIST))]
11403 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11404 [(set_attr "type" "sselog1")
11405 (set_attr "prefix_extra" "1")
11406 (set_attr "length_immediate" "1")
11407 (set_attr "prefix" "maybe_vex")
11408 (set_attr "mode" "TI")])
11410 (define_insn "*vpclmulqdq"
11411 [(set (match_operand:V2DI 0 "register_operand" "=x")
11412 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11413 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11414 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11416 "TARGET_PCLMUL && TARGET_AVX"
11417 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11418 [(set_attr "type" "sselog1")
11419 (set_attr "prefix_extra" "1")
11420 (set_attr "length_immediate" "1")
11421 (set_attr "prefix" "vex")
11422 (set_attr "mode" "TI")])
11424 (define_insn "pclmulqdq"
11425 [(set (match_operand:V2DI 0 "register_operand" "=x")
11426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11427 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11428 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11431 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11432 [(set_attr "type" "sselog1")
11433 (set_attr "prefix_extra" "1")
11434 (set_attr "length_immediate" "1")
11435 (set_attr "mode" "TI")])
11437 (define_expand "avx_vzeroall"
11438 [(match_par_dup 0 [(const_int 0)])]
11441 int nregs = TARGET_64BIT ? 16 : 8;
11444 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11446 XVECEXP (operands[0], 0, 0)
11447 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11450 for (regno = 0; regno < nregs; regno++)
11451 XVECEXP (operands[0], 0, regno + 1)
11452 = gen_rtx_SET (VOIDmode,
11453 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11454 CONST0_RTX (V8SImode));
11457 (define_insn "*avx_vzeroall"
11458 [(match_parallel 0 "vzeroall_operation"
11459 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11462 [(set_attr "type" "sse")
11463 (set_attr "modrm" "0")
11464 (set_attr "memory" "none")
11465 (set_attr "prefix" "vex")
11466 (set_attr "mode" "OI")])
11468 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11469 (define_expand "avx_vzeroupper"
11470 [(match_par_dup 0 [(const_int 0)])]
11473 int nregs = TARGET_64BIT ? 16 : 8;
11476 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11478 XVECEXP (operands[0], 0, 0)
11479 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11480 UNSPECV_VZEROUPPER);
11482 for (regno = 0; regno < nregs; regno++)
11483 XVECEXP (operands[0], 0, regno + 1)
11484 = gen_rtx_CLOBBER (VOIDmode,
11485 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11488 (define_insn "*avx_vzeroupper"
11489 [(match_parallel 0 "vzeroupper_operation"
11490 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11493 [(set_attr "type" "sse")
11494 (set_attr "modrm" "0")
11495 (set_attr "memory" "none")
11496 (set_attr "prefix" "vex")
11497 (set_attr "mode" "OI")])
11499 (define_insn_and_split "vec_dup<mode>"
11500 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11501 (vec_duplicate:AVX256MODE24P
11502 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11505 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11507 "&& reload_completed && REG_P (operands[1])"
11508 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11509 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11510 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11511 [(set_attr "type" "ssemov")
11512 (set_attr "prefix_extra" "1")
11513 (set_attr "prefix" "vex")
11514 (set_attr "mode" "V8SF")])
11516 (define_insn "avx_vbroadcastf128_<mode>"
11517 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11518 (vec_concat:AVX256MODE
11519 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11523 vbroadcastf128\t{%1, %0|%0, %1}
11524 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11525 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11526 [(set_attr "type" "ssemov,sselog1,sselog1")
11527 (set_attr "prefix_extra" "1")
11528 (set_attr "length_immediate" "0,1,1")
11529 (set_attr "prefix" "vex")
11530 (set_attr "mode" "V4SF,V8SF,V8SF")])
11532 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11533 ;; If it so happens that the input is in memory, use vbroadcast.
11534 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11535 (define_insn "*avx_vperm_broadcast_v4sf"
11536 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11538 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11539 (match_parallel 2 "avx_vbroadcast_operand"
11540 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11543 int elt = INTVAL (operands[3]);
11544 switch (which_alternative)
11548 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11549 return "vbroadcastss\t{%1, %0|%0, %1}";
11551 operands[2] = GEN_INT (elt * 0x55);
11552 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11554 gcc_unreachable ();
11557 [(set_attr "type" "ssemov,ssemov,sselog1")
11558 (set_attr "prefix_extra" "1")
11559 (set_attr "length_immediate" "0,0,1")
11560 (set_attr "prefix" "vex")
11561 (set_attr "mode" "SF,SF,V4SF")])
11563 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11564 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11565 (vec_select:AVX256MODEF2P
11566 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11567 (match_parallel 2 "avx_vbroadcast_operand"
11568 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11571 "&& reload_completed"
11572 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11574 rtx op0 = operands[0], op1 = operands[1];
11575 int elt = INTVAL (operands[3]);
11581 /* Shuffle element we care about into all elements of the 128-bit lane.
11582 The other lane gets shuffled too, but we don't care. */
11583 if (<MODE>mode == V4DFmode)
11584 mask = (elt & 1 ? 15 : 0);
11586 mask = (elt & 3) * 0x55;
11587 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11589 /* Shuffle the lane we care about into both lanes of the dest. */
11590 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11591 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11595 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11596 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11599 (define_expand "avx_vpermil<mode>"
11600 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11601 (vec_select:AVXMODEFDP
11602 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11603 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11606 int mask = INTVAL (operands[2]);
11607 rtx perm[<ssescalarnum>];
11609 perm[0] = GEN_INT (mask & 1);
11610 perm[1] = GEN_INT ((mask >> 1) & 1);
11611 if (<MODE>mode == V4DFmode)
11613 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11614 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11618 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11621 (define_expand "avx_vpermil<mode>"
11622 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11623 (vec_select:AVXMODEFSP
11624 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11625 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11628 int mask = INTVAL (operands[2]);
11629 rtx perm[<ssescalarnum>];
11631 perm[0] = GEN_INT (mask & 3);
11632 perm[1] = GEN_INT ((mask >> 2) & 3);
11633 perm[2] = GEN_INT ((mask >> 4) & 3);
11634 perm[3] = GEN_INT ((mask >> 6) & 3);
11635 if (<MODE>mode == V8SFmode)
11637 perm[4] = GEN_INT ((mask & 3) + 4);
11638 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11639 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11640 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11644 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11647 (define_insn "*avx_vpermilp<mode>"
11648 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11649 (vec_select:AVXMODEF2P
11650 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11651 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11652 [(match_operand 3 "const_int_operand" "")])))]
11655 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11656 operands[2] = GEN_INT (mask);
11657 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11659 [(set_attr "type" "sselog")
11660 (set_attr "prefix_extra" "1")
11661 (set_attr "length_immediate" "1")
11662 (set_attr "prefix" "vex")
11663 (set_attr "mode" "<MODE>")])
11665 (define_insn "avx_vpermilvar<mode>3"
11666 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11668 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11669 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11672 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11673 [(set_attr "type" "sselog")
11674 (set_attr "prefix_extra" "1")
11675 (set_attr "prefix" "vex")
11676 (set_attr "mode" "<MODE>")])
11678 (define_expand "avx_vperm2f128<mode>3"
11679 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11680 (unspec:AVX256MODE2P
11681 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11682 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11683 (match_operand:SI 3 "const_0_to_255_operand" "")]
11684 UNSPEC_VPERMIL2F128))]
11687 int mask = INTVAL (operands[3]);
11688 if ((mask & 0x88) == 0)
11690 rtx perm[<ssescalarnum>], t1, t2;
11691 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11693 base = (mask & 3) * nelt2;
11694 for (i = 0; i < nelt2; ++i)
11695 perm[i] = GEN_INT (base + i);
11697 base = ((mask >> 4) & 3) * nelt2;
11698 for (i = 0; i < nelt2; ++i)
11699 perm[i + nelt2] = GEN_INT (base + i);
11701 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11702 operands[1], operands[2]);
11703 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11704 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11705 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11711 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11712 ;; means that in order to represent this properly in rtl we'd have to
11713 ;; nest *another* vec_concat with a zero operand and do the select from
11714 ;; a 4x wide vector. That doesn't seem very nice.
11715 (define_insn "*avx_vperm2f128<mode>_full"
11716 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11717 (unspec:AVX256MODE2P
11718 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11719 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11720 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11721 UNSPEC_VPERMIL2F128))]
11723 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11724 [(set_attr "type" "sselog")
11725 (set_attr "prefix_extra" "1")
11726 (set_attr "length_immediate" "1")
11727 (set_attr "prefix" "vex")
11728 (set_attr "mode" "V8SF")])
11730 (define_insn "*avx_vperm2f128<mode>_nozero"
11731 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11732 (vec_select:AVX256MODE2P
11733 (vec_concat:<ssedoublesizemode>
11734 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11735 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11736 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11737 [(match_operand 4 "const_int_operand" "")])))]
11740 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11741 operands[3] = GEN_INT (mask);
11742 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11744 [(set_attr "type" "sselog")
11745 (set_attr "prefix_extra" "1")
11746 (set_attr "length_immediate" "1")
11747 (set_attr "prefix" "vex")
11748 (set_attr "mode" "V8SF")])
11750 (define_expand "avx_vinsertf128<mode>"
11751 [(match_operand:AVX256MODE 0 "register_operand" "")
11752 (match_operand:AVX256MODE 1 "register_operand" "")
11753 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11754 (match_operand:SI 3 "const_0_to_1_operand" "")]
11757 switch (INTVAL (operands[3]))
11760 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11764 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11768 gcc_unreachable ();
11773 (define_insn "vec_set_lo_<mode>"
11774 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11775 (vec_concat:AVX256MODE4P
11776 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11777 (vec_select:<avxhalfvecmode>
11778 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11779 (parallel [(const_int 2) (const_int 3)]))))]
11781 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11782 [(set_attr "type" "sselog")
11783 (set_attr "prefix_extra" "1")
11784 (set_attr "length_immediate" "1")
11785 (set_attr "prefix" "vex")
11786 (set_attr "mode" "V8SF")])
11788 (define_insn "vec_set_hi_<mode>"
11789 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11790 (vec_concat:AVX256MODE4P
11791 (vec_select:<avxhalfvecmode>
11792 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11793 (parallel [(const_int 0) (const_int 1)]))
11794 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11796 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11797 [(set_attr "type" "sselog")
11798 (set_attr "prefix_extra" "1")
11799 (set_attr "length_immediate" "1")
11800 (set_attr "prefix" "vex")
11801 (set_attr "mode" "V8SF")])
11803 (define_insn "vec_set_lo_<mode>"
11804 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11805 (vec_concat:AVX256MODE8P
11806 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11807 (vec_select:<avxhalfvecmode>
11808 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11809 (parallel [(const_int 4) (const_int 5)
11810 (const_int 6) (const_int 7)]))))]
11812 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11813 [(set_attr "type" "sselog")
11814 (set_attr "prefix_extra" "1")
11815 (set_attr "length_immediate" "1")
11816 (set_attr "prefix" "vex")
11817 (set_attr "mode" "V8SF")])
11819 (define_insn "vec_set_hi_<mode>"
11820 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11821 (vec_concat:AVX256MODE8P
11822 (vec_select:<avxhalfvecmode>
11823 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11824 (parallel [(const_int 0) (const_int 1)
11825 (const_int 2) (const_int 3)]))
11826 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11828 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11829 [(set_attr "type" "sselog")
11830 (set_attr "prefix_extra" "1")
11831 (set_attr "length_immediate" "1")
11832 (set_attr "prefix" "vex")
11833 (set_attr "mode" "V8SF")])
11835 (define_insn "vec_set_lo_v16hi"
11836 [(set (match_operand:V16HI 0 "register_operand" "=x")
11838 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11840 (match_operand:V16HI 1 "register_operand" "x")
11841 (parallel [(const_int 8) (const_int 9)
11842 (const_int 10) (const_int 11)
11843 (const_int 12) (const_int 13)
11844 (const_int 14) (const_int 15)]))))]
11846 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11847 [(set_attr "type" "sselog")
11848 (set_attr "prefix_extra" "1")
11849 (set_attr "length_immediate" "1")
11850 (set_attr "prefix" "vex")
11851 (set_attr "mode" "V8SF")])
11853 (define_insn "vec_set_hi_v16hi"
11854 [(set (match_operand:V16HI 0 "register_operand" "=x")
11857 (match_operand:V16HI 1 "register_operand" "x")
11858 (parallel [(const_int 0) (const_int 1)
11859 (const_int 2) (const_int 3)
11860 (const_int 4) (const_int 5)
11861 (const_int 6) (const_int 7)]))
11862 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11864 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11865 [(set_attr "type" "sselog")
11866 (set_attr "prefix_extra" "1")
11867 (set_attr "length_immediate" "1")
11868 (set_attr "prefix" "vex")
11869 (set_attr "mode" "V8SF")])
11871 (define_insn "vec_set_lo_v32qi"
11872 [(set (match_operand:V32QI 0 "register_operand" "=x")
11874 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11876 (match_operand:V32QI 1 "register_operand" "x")
11877 (parallel [(const_int 16) (const_int 17)
11878 (const_int 18) (const_int 19)
11879 (const_int 20) (const_int 21)
11880 (const_int 22) (const_int 23)
11881 (const_int 24) (const_int 25)
11882 (const_int 26) (const_int 27)
11883 (const_int 28) (const_int 29)
11884 (const_int 30) (const_int 31)]))))]
11886 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11887 [(set_attr "type" "sselog")
11888 (set_attr "prefix_extra" "1")
11889 (set_attr "length_immediate" "1")
11890 (set_attr "prefix" "vex")
11891 (set_attr "mode" "V8SF")])
11893 (define_insn "vec_set_hi_v32qi"
11894 [(set (match_operand:V32QI 0 "register_operand" "=x")
11897 (match_operand:V32QI 1 "register_operand" "x")
11898 (parallel [(const_int 0) (const_int 1)
11899 (const_int 2) (const_int 3)
11900 (const_int 4) (const_int 5)
11901 (const_int 6) (const_int 7)
11902 (const_int 8) (const_int 9)
11903 (const_int 10) (const_int 11)
11904 (const_int 12) (const_int 13)
11905 (const_int 14) (const_int 15)]))
11906 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11908 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11909 [(set_attr "type" "sselog")
11910 (set_attr "prefix_extra" "1")
11911 (set_attr "length_immediate" "1")
11912 (set_attr "prefix" "vex")
11913 (set_attr "mode" "V8SF")])
11915 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11916 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11918 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11919 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11923 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11924 [(set_attr "type" "sselog1")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "prefix" "vex")
11927 (set_attr "mode" "<MODE>")])
11929 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11930 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11932 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11933 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11935 UNSPEC_MASKSTORE))]
11937 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11938 [(set_attr "type" "sselog1")
11939 (set_attr "prefix_extra" "1")
11940 (set_attr "prefix" "vex")
11941 (set_attr "mode" "<MODE>")])
11943 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11944 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11945 (unspec:AVX256MODE2P
11946 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11950 "&& reload_completed"
11953 rtx op1 = operands[1];
11955 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11957 op1 = gen_lowpart (<MODE>mode, op1);
11958 emit_move_insn (operands[0], op1);
11962 (define_expand "vec_init<mode>"
11963 [(match_operand:AVX256MODE 0 "register_operand" "")
11964 (match_operand 1 "" "")]
11967 ix86_expand_vector_init (false, operands[0], operands[1]);
11971 (define_insn "*vec_concat<mode>_avx"
11972 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11973 (vec_concat:AVX256MODE
11974 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11975 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11978 switch (which_alternative)
11981 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11983 switch (get_attr_mode (insn))
11986 return "vmovaps\t{%1, %x0|%x0, %1}";
11988 return "vmovapd\t{%1, %x0|%x0, %1}";
11990 return "vmovdqa\t{%1, %x0|%x0, %1}";
11993 gcc_unreachable ();
11996 [(set_attr "type" "sselog,ssemov")
11997 (set_attr "prefix_extra" "1,*")
11998 (set_attr "length_immediate" "1,*")
11999 (set_attr "prefix" "vex")
12000 (set_attr "mode" "<avxvecmode>")])
12002 (define_insn "vcvtph2ps"
12003 [(set (match_operand:V4SF 0 "register_operand" "=x")
12005 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12007 (parallel [(const_int 0) (const_int 1)
12008 (const_int 1) (const_int 2)])))]
12010 "vcvtph2ps\t{%1, %0|%0, %1}"
12011 [(set_attr "type" "ssecvt")
12012 (set_attr "prefix" "vex")
12013 (set_attr "mode" "V4SF")])
12015 (define_insn "*vcvtph2ps_load"
12016 [(set (match_operand:V4SF 0 "register_operand" "=x")
12017 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12018 UNSPEC_VCVTPH2PS))]
12020 "vcvtph2ps\t{%1, %0|%0, %1}"
12021 [(set_attr "type" "ssecvt")
12022 (set_attr "prefix" "vex")
12023 (set_attr "mode" "V8SF")])
12025 (define_insn "vcvtph2ps256"
12026 [(set (match_operand:V8SF 0 "register_operand" "=x")
12027 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12028 UNSPEC_VCVTPH2PS))]
12030 "vcvtph2ps\t{%1, %0|%0, %1}"
12031 [(set_attr "type" "ssecvt")
12032 (set_attr "prefix" "vex")
12033 (set_attr "mode" "V8SF")])
12035 (define_expand "vcvtps2ph"
12036 [(set (match_operand:V8HI 0 "register_operand" "")
12038 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12039 (match_operand:SI 2 "immediate_operand" "")]
12043 "operands[3] = CONST0_RTX (V4HImode);")
12045 (define_insn "*vcvtps2ph"
12046 [(set (match_operand:V8HI 0 "register_operand" "=x")
12048 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12049 (match_operand:SI 2 "immediate_operand" "N")]
12051 (match_operand:V4HI 3 "const0_operand" "")))]
12053 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12054 [(set_attr "type" "ssecvt")
12055 (set_attr "prefix" "vex")
12056 (set_attr "mode" "V4SF")])
12058 (define_insn "*vcvtps2ph_store"
12059 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12060 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12061 (match_operand:SI 2 "immediate_operand" "N")]
12062 UNSPEC_VCVTPS2PH))]
12064 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12065 [(set_attr "type" "ssecvt")
12066 (set_attr "prefix" "vex")
12067 (set_attr "mode" "V4SF")])
12069 (define_insn "vcvtps2ph256"
12070 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12071 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12072 (match_operand:SI 2 "immediate_operand" "N")]
12073 UNSPEC_VCVTPS2PH))]
12075 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12076 [(set_attr "type" "ssecvt")
12077 (set_attr "prefix" "vex")
12078 (set_attr "mode" "V8SF")])