1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
60 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
61 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
62 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
63 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
64 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
65 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
66 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
67 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
68 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
69 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
72 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
74 ;; Int-float size matches
75 (define_mode_iterator SSEMODE4S [V4SF V4SI])
76 (define_mode_iterator SSEMODE2D [V2DF V2DI])
78 ;; Modes handled by integer vcond pattern
79 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
80 (V2DI "TARGET_SSE4_2")])
82 ;; Modes handled by vec_extract_even/odd pattern.
83 (define_mode_iterator SSEMODE_EO
86 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
87 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
88 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
90 ;; Modes handled by storent patterns.
91 (define_mode_iterator STORENT_MODE
92 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
93 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
95 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
97 ;; Modes handled by vector float patterns.
98 (define_mode_iterator VEC_FLOAT_MODE
99 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
100 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
102 ;; Modes handled by vector extract patterns.
103 (define_mode_iterator VEC_EXTRACT_MODE
104 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
105 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
106 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
107 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
109 ;; Mapping from float mode to required SSE level
110 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
112 ;; Mapping from integer vector mode to mnemonic suffix
113 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
115 ;; Mapping of the insn mnemonic suffix
116 (define_mode_attr ssemodesuffix
117 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
118 (V8SI "ps") (V4DI "pd")])
119 (define_mode_attr ssescalarmodesuffix
120 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
121 (V4DF "sd") (V4SI "d") (V4DI "sd")])
123 ;; Mapping of the max integer size for xop rotate immediate constraint
124 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
126 ;; Mapping of vector modes back to the scalar modes
127 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
128 (V16QI "QI") (V8HI "HI")
129 (V4SI "SI") (V2DI "DI")])
131 ;; Mapping of vector modes to a vector mode of double size
132 (define_mode_attr ssedoublesizemode
133 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
134 (V8HI "V16HI") (V16QI "V32QI")
135 (V4DF "V8DF") (V8SF "V16SF")
136 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
138 ;; Number of scalar elements in each vector type
139 (define_mode_attr ssescalarnum
140 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
141 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
144 (define_mode_attr avxvecmode
145 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
146 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
147 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
148 (define_mode_attr avxvecpsmode
149 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
150 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
151 (define_mode_attr avxhalfvecmode
152 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
153 (V8SF "V4SF") (V4DF "V2DF")
154 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
155 (define_mode_attr avxscalarmode
156 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
157 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
158 (define_mode_attr avxcvtvecmode
159 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
160 (define_mode_attr avxpermvecmode
161 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
162 (define_mode_attr avxmodesuffixp
163 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
165 (define_mode_attr avxmodesuffix
166 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
167 (V8SI "256") (V8SF "256") (V4DF "256")])
169 ;; Mapping of immediate bits for blend instructions
170 (define_mode_attr blendbits
171 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
173 ;; Mapping of immediate bits for pinsr instructions
174 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
176 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184 (define_expand "mov<mode>"
185 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
186 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (<MODE>mode, operands);
193 (define_insn "*avx_mov<mode>_internal"
194 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
195 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
197 && (register_operand (operands[0], <MODE>mode)
198 || register_operand (operands[1], <MODE>mode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 switch (get_attr_mode (insn))
210 return "vmovaps\t{%1, %0|%0, %1}";
213 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
214 return "vmovaps\t{%1, %0|%0, %1}";
216 return "vmovapd\t{%1, %0|%0, %1}";
218 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
219 return "vmovaps\t{%1, %0|%0, %1}";
221 return "vmovdqa\t{%1, %0|%0, %1}";
227 [(set_attr "type" "sselog1,ssemov,ssemov")
228 (set_attr "prefix" "vex")
229 (set_attr "mode" "<avxvecmode>")])
231 ;; All of these patterns are enabled for SSE1 as well as SSE2.
232 ;; This is essential for maintaining stable calling conventions.
234 (define_expand "mov<mode>"
235 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
236 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
239 ix86_expand_vector_move (<MODE>mode, operands);
243 (define_insn "*mov<mode>_internal"
244 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
245 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
247 && (register_operand (operands[0], <MODE>mode)
248 || register_operand (operands[1], <MODE>mode))"
250 switch (which_alternative)
253 return standard_sse_constant_opcode (insn, operands[1]);
256 switch (get_attr_mode (insn))
259 return "movaps\t{%1, %0|%0, %1}";
261 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
262 return "movaps\t{%1, %0|%0, %1}";
264 return "movapd\t{%1, %0|%0, %1}";
266 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
267 return "movaps\t{%1, %0|%0, %1}";
269 return "movdqa\t{%1, %0|%0, %1}";
275 [(set_attr "type" "sselog1,ssemov,ssemov")
277 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
278 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
279 (and (eq_attr "alternative" "2")
280 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
282 (const_string "V4SF")
283 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
284 (const_string "V4SF")
285 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
286 (const_string "V2DF")
288 (const_string "TI")))])
290 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
291 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
292 ;; from memory, we'd prefer to load the memory directly into the %xmm
293 ;; register. To facilitate this happy circumstance, this pattern won't
294 ;; split until after register allocation. If the 64-bit value didn't
295 ;; come from memory, this is the best we can do. This is much better
296 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
299 (define_insn_and_split "movdi_to_sse"
301 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
302 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
303 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
304 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
306 "&& reload_completed"
309 if (register_operand (operands[1], DImode))
311 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
312 Assemble the 64-bit DImode value in an xmm register. */
313 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
314 gen_rtx_SUBREG (SImode, operands[1], 0)));
315 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
316 gen_rtx_SUBREG (SImode, operands[1], 4)));
317 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
320 else if (memory_operand (operands[1], DImode))
321 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
322 operands[1], const0_rtx));
328 [(set (match_operand:V4SF 0 "register_operand" "")
329 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
330 "TARGET_SSE && reload_completed"
333 (vec_duplicate:V4SF (match_dup 1))
337 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
338 operands[2] = CONST0_RTX (V4SFmode);
342 [(set (match_operand:V2DF 0 "register_operand" "")
343 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
344 "TARGET_SSE2 && reload_completed"
345 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
347 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
348 operands[2] = CONST0_RTX (DFmode);
351 (define_expand "push<mode>1"
352 [(match_operand:AVX256MODE 0 "register_operand" "")]
355 ix86_expand_push (<MODE>mode, operands[0]);
359 (define_expand "push<mode>1"
360 [(match_operand:SSEMODE16 0 "register_operand" "")]
363 ix86_expand_push (<MODE>mode, operands[0]);
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
369 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
376 (define_expand "movmisalign<mode>"
377 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
378 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
381 ix86_expand_vector_move_misalign (<MODE>mode, operands);
385 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
386 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
388 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
390 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
392 if (MEM_P (operands[0]) && MEM_P (operands[1]))
393 operands[1] = force_reg (<MODE>mode, operands[1]);
396 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
397 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
399 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
401 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
402 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
403 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
404 [(set_attr "type" "ssemov")
405 (set_attr "movu" "1")
406 (set_attr "prefix" "vex")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "sse2_movq128"
410 [(set (match_operand:V2DI 0 "register_operand" "=x")
413 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
414 (parallel [(const_int 0)]))
417 "%vmovq\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "prefix" "maybe_vex")
420 (set_attr "mode" "TI")])
422 (define_expand "<sse>_movu<ssemodesuffix>"
423 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
425 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
429 if (MEM_P (operands[0]) && MEM_P (operands[1]))
430 operands[1] = force_reg (<MODE>mode, operands[1]);
433 (define_insn "*<sse>_movu<ssemodesuffix>"
434 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
436 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
438 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
439 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
440 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "movu" "1")
443 (set_attr "mode" "<MODE>")])
445 (define_expand "avx_movdqu<avxmodesuffix>"
446 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
448 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
452 if (MEM_P (operands[0]) && MEM_P (operands[1]))
453 operands[1] = force_reg (<MODE>mode, operands[1]);
456 (define_insn "*avx_movdqu<avxmodesuffix>"
457 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
459 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
461 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
462 "vmovdqu\t{%1, %0|%0, %1}"
463 [(set_attr "type" "ssemov")
464 (set_attr "movu" "1")
465 (set_attr "prefix" "vex")
466 (set_attr "mode" "<avxvecmode>")])
468 (define_expand "sse2_movdqu"
469 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
470 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
474 if (MEM_P (operands[0]) && MEM_P (operands[1]))
475 operands[1] = force_reg (V16QImode, operands[1]);
478 (define_insn "*sse2_movdqu"
479 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
480 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
482 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
483 "movdqu\t{%1, %0|%0, %1}"
484 [(set_attr "type" "ssemov")
485 (set_attr "movu" "1")
486 (set_attr "prefix_data16" "1")
487 (set_attr "mode" "TI")])
489 (define_insn "avx_movnt<mode>"
490 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
492 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
495 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "prefix" "vex")
498 (set_attr "mode" "<MODE>")])
500 (define_insn "<sse>_movnt<mode>"
501 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
503 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
507 [(set_attr "type" "ssemov")
508 (set_attr "mode" "<MODE>")])
510 (define_insn "avx_movnt<mode>"
511 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
513 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
516 "vmovntdq\t{%1, %0|%0, %1}"
517 [(set_attr "type" "ssecvt")
518 (set_attr "prefix" "vex")
519 (set_attr "mode" "<avxvecmode>")])
521 (define_insn "sse2_movntv2di"
522 [(set (match_operand:V2DI 0 "memory_operand" "=m")
523 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
526 "movntdq\t{%1, %0|%0, %1}"
527 [(set_attr "type" "ssemov")
528 (set_attr "prefix_data16" "1")
529 (set_attr "mode" "TI")])
531 (define_insn "sse2_movntsi"
532 [(set (match_operand:SI 0 "memory_operand" "=m")
533 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
536 "movnti\t{%1, %0|%0, %1}"
537 [(set_attr "type" "ssemov")
538 (set_attr "prefix_data16" "0")
539 (set_attr "mode" "V2DF")])
541 (define_insn "avx_lddqu<avxmodesuffix>"
542 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
544 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
547 "vlddqu\t{%1, %0|%0, %1}"
548 [(set_attr "type" "ssecvt")
549 (set_attr "movu" "1")
550 (set_attr "prefix" "vex")
551 (set_attr "mode" "<avxvecmode>")])
553 (define_insn "sse3_lddqu"
554 [(set (match_operand:V16QI 0 "register_operand" "=x")
555 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
558 "lddqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set_attr "prefix_data16" "0")
562 (set_attr "prefix_rep" "1")
563 (set_attr "mode" "TI")])
565 ; Expand patterns for non-temporal stores. At the moment, only those
566 ; that directly map to insns are defined; it would be possible to
567 ; define patterns for other modes that would expand to several insns.
569 (define_expand "storent<mode>"
570 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
572 [(match_operand:STORENT_MODE 1 "register_operand" "")]
575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
577 ;; Parallel floating point arithmetic
579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
581 (define_expand "<code><mode>2"
582 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
583 (absneg:VEC_FLOAT_MODE
584 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
586 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
588 (define_insn_and_split "*avx_absneg<mode>2"
589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
590 (match_operator:AVXMODEF2P 3 "absneg_operator"
591 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")]))
592 (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))]
593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
595 "&& reload_completed"
600 if (MEM_P (operands[1]))
601 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
602 <MODE>mode, operands[2], operands[1]);
604 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
605 <MODE>mode, operands[1], operands[2]);
606 t = gen_rtx_SET (VOIDmode, operands[0], t);
611 (define_insn_and_split "*sse_absneg<mode>2"
612 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
613 (match_operator:SSEMODEF2P 3 "absneg_operator"
614 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")]))
615 (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))]
616 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
618 "&& reload_completed"
623 t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1];
624 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
625 <MODE>mode, operands[0], t);
626 t = gen_rtx_SET (VOIDmode, operands[0], t);
631 (define_expand "<plusminus_insn><mode>3"
632 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
633 (plusminus:AVX256MODEF2P
634 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
636 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
639 (define_insn "*avx_<plusminus_insn><mode>3"
640 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
641 (plusminus:AVXMODEF2P
642 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
643 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
646 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
647 [(set_attr "type" "sseadd")
648 (set_attr "prefix" "vex")
649 (set_attr "mode" "<avxvecmode>")])
651 (define_expand "<plusminus_insn><mode>3"
652 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
653 (plusminus:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
656 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
657 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
659 (define_insn "*<plusminus_insn><mode>3"
660 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
661 (plusminus:SSEMODEF2P
662 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
663 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
664 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
665 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
666 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
667 [(set_attr "type" "sseadd")
668 (set_attr "mode" "<MODE>")])
670 (define_insn "*avx_vm<plusminus_insn><mode>3"
671 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
672 (vec_merge:SSEMODEF2P
673 (plusminus:SSEMODEF2P
674 (match_operand:SSEMODEF2P 1 "register_operand" "x")
675 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
678 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
679 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
680 [(set_attr "type" "sseadd")
681 (set_attr "prefix" "vex")
682 (set_attr "mode" "<ssescalarmode>")])
684 (define_insn "<sse>_vm<plusminus_insn><mode>3"
685 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
686 (vec_merge:SSEMODEF2P
687 (plusminus:SSEMODEF2P
688 (match_operand:SSEMODEF2P 1 "register_operand" "0")
689 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
692 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
693 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
694 [(set_attr "type" "sseadd")
695 (set_attr "mode" "<ssescalarmode>")])
697 (define_expand "mul<mode>3"
698 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
700 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
701 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
702 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
703 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
705 (define_insn "*avx_mul<mode>3"
706 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
708 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
709 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
710 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
711 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
712 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
713 [(set_attr "type" "ssemul")
714 (set_attr "prefix" "vex")
715 (set_attr "mode" "<avxvecmode>")])
717 (define_expand "mul<mode>3"
718 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
720 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
721 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
722 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
723 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
725 (define_insn "*mul<mode>3"
726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
730 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
731 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
732 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssemul")
734 (set_attr "mode" "<MODE>")])
736 (define_insn "*avx_vmmul<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (vec_merge:SSEMODEF2P
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
744 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
745 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
746 [(set_attr "type" "ssemul")
747 (set_attr "prefix" "vex")
748 (set_attr "mode" "<ssescalarmode>")])
750 (define_insn "<sse>_vmmul<mode>3"
751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
752 (vec_merge:SSEMODEF2P
754 (match_operand:SSEMODEF2P 1 "register_operand" "0")
755 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
758 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
759 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssemul")
761 (set_attr "mode" "<ssescalarmode>")])
763 (define_expand "divv8sf3"
764 [(set (match_operand:V8SF 0 "register_operand" "")
765 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
766 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
769 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
771 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
772 && flag_finite_math_only && !flag_trapping_math
773 && flag_unsafe_math_optimizations)
775 ix86_emit_swdivsf (operands[0], operands[1],
776 operands[2], V8SFmode);
781 (define_expand "divv4df3"
782 [(set (match_operand:V4DF 0 "register_operand" "")
783 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
784 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
786 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
788 (define_insn "avx_div<mode>3"
789 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
791 (match_operand:AVXMODEF2P 1 "register_operand" "x")
792 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
793 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
794 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
795 [(set_attr "type" "ssediv")
796 (set_attr "prefix" "vex")
797 (set_attr "mode" "<MODE>")])
799 (define_expand "divv4sf3"
800 [(set (match_operand:V4SF 0 "register_operand" "")
801 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
802 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
805 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
806 && flag_finite_math_only && !flag_trapping_math
807 && flag_unsafe_math_optimizations)
809 ix86_emit_swdivsf (operands[0], operands[1],
810 operands[2], V4SFmode);
815 (define_expand "divv2df3"
816 [(set (match_operand:V2DF 0 "register_operand" "")
817 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
818 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
821 (define_insn "*avx_div<mode>3"
822 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
824 (match_operand:SSEMODEF2P 1 "register_operand" "x")
825 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
826 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
827 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
828 [(set_attr "type" "ssediv")
829 (set_attr "prefix" "vex")
830 (set_attr "mode" "<MODE>")])
832 (define_insn "<sse>_div<mode>3"
833 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
835 (match_operand:SSEMODEF2P 1 "register_operand" "0")
836 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
837 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
838 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssediv")
840 (set_attr "mode" "<MODE>")])
842 (define_insn "*avx_vmdiv<mode>3"
843 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
844 (vec_merge:SSEMODEF2P
846 (match_operand:SSEMODEF2P 1 "register_operand" "x")
847 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
850 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
851 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
852 [(set_attr "type" "ssediv")
853 (set_attr "prefix" "vex")
854 (set_attr "mode" "<ssescalarmode>")])
856 (define_insn "<sse>_vmdiv<mode>3"
857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
858 (vec_merge:SSEMODEF2P
860 (match_operand:SSEMODEF2P 1 "register_operand" "0")
861 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
864 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
865 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
866 [(set_attr "type" "ssediv")
867 (set_attr "mode" "<ssescalarmode>")])
869 (define_insn "avx_rcpv8sf2"
870 [(set (match_operand:V8SF 0 "register_operand" "=x")
872 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
874 "vrcpps\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "prefix" "vex")
877 (set_attr "mode" "V8SF")])
879 (define_insn "sse_rcpv4sf2"
880 [(set (match_operand:V4SF 0 "register_operand" "=x")
882 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
884 "%vrcpps\t{%1, %0|%0, %1}"
885 [(set_attr "type" "sse")
886 (set_attr "atom_sse_attr" "rcp")
887 (set_attr "prefix" "maybe_vex")
888 (set_attr "mode" "V4SF")])
890 (define_insn "*avx_vmrcpv4sf2"
891 [(set (match_operand:V4SF 0 "register_operand" "=x")
893 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
895 (match_operand:V4SF 2 "register_operand" "x")
898 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "type" "sse")
900 (set_attr "prefix" "vex")
901 (set_attr "mode" "SF")])
903 (define_insn "sse_vmrcpv4sf2"
904 [(set (match_operand:V4SF 0 "register_operand" "=x")
906 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
908 (match_operand:V4SF 2 "register_operand" "0")
911 "rcpss\t{%1, %0|%0, %1}"
912 [(set_attr "type" "sse")
913 (set_attr "atom_sse_attr" "rcp")
914 (set_attr "mode" "SF")])
916 (define_expand "sqrtv8sf2"
917 [(set (match_operand:V8SF 0 "register_operand" "")
918 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
921 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
922 && flag_finite_math_only && !flag_trapping_math
923 && flag_unsafe_math_optimizations)
925 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
930 (define_insn "avx_sqrtv8sf2"
931 [(set (match_operand:V8SF 0 "register_operand" "=x")
932 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
934 "vsqrtps\t{%1, %0|%0, %1}"
935 [(set_attr "type" "sse")
936 (set_attr "prefix" "vex")
937 (set_attr "mode" "V8SF")])
939 (define_expand "sqrtv4sf2"
940 [(set (match_operand:V4SF 0 "register_operand" "")
941 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
944 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
945 && flag_finite_math_only && !flag_trapping_math
946 && flag_unsafe_math_optimizations)
948 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
953 (define_insn "sse_sqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
955 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
957 "%vsqrtps\t{%1, %0|%0, %1}"
958 [(set_attr "type" "sse")
959 (set_attr "atom_sse_attr" "sqrt")
960 (set_attr "prefix" "maybe_vex")
961 (set_attr "mode" "V4SF")])
963 (define_insn "sqrtv4df2"
964 [(set (match_operand:V4DF 0 "register_operand" "=x")
965 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
967 "vsqrtpd\t{%1, %0|%0, %1}"
968 [(set_attr "type" "sse")
969 (set_attr "prefix" "vex")
970 (set_attr "mode" "V4DF")])
972 (define_insn "sqrtv2df2"
973 [(set (match_operand:V2DF 0 "register_operand" "=x")
974 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
976 "%vsqrtpd\t{%1, %0|%0, %1}"
977 [(set_attr "type" "sse")
978 (set_attr "prefix" "maybe_vex")
979 (set_attr "mode" "V2DF")])
981 (define_insn "*avx_vmsqrt<mode>2"
982 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
983 (vec_merge:SSEMODEF2P
985 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
986 (match_operand:SSEMODEF2P 2 "register_operand" "x")
988 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
989 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
990 [(set_attr "type" "sse")
991 (set_attr "prefix" "vex")
992 (set_attr "mode" "<ssescalarmode>")])
994 (define_insn "<sse>_vmsqrt<mode>2"
995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
996 (vec_merge:SSEMODEF2P
998 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
999 (match_operand:SSEMODEF2P 2 "register_operand" "0")
1001 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1002 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "sse")
1004 (set_attr "atom_sse_attr" "sqrt")
1005 (set_attr "mode" "<ssescalarmode>")])
1007 (define_expand "rsqrtv8sf2"
1008 [(set (match_operand:V8SF 0 "register_operand" "")
1010 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1011 "TARGET_AVX && TARGET_SSE_MATH"
1013 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
1017 (define_insn "avx_rsqrtv8sf2"
1018 [(set (match_operand:V8SF 0 "register_operand" "=x")
1020 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1022 "vrsqrtps\t{%1, %0|%0, %1}"
1023 [(set_attr "type" "sse")
1024 (set_attr "prefix" "vex")
1025 (set_attr "mode" "V8SF")])
1027 (define_expand "rsqrtv4sf2"
1028 [(set (match_operand:V4SF 0 "register_operand" "")
1030 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1033 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
1037 (define_insn "sse_rsqrtv4sf2"
1038 [(set (match_operand:V4SF 0 "register_operand" "=x")
1040 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1042 "%vrsqrtps\t{%1, %0|%0, %1}"
1043 [(set_attr "type" "sse")
1044 (set_attr "prefix" "maybe_vex")
1045 (set_attr "mode" "V4SF")])
1047 (define_insn "*avx_vmrsqrtv4sf2"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x")
1050 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1052 (match_operand:V4SF 2 "register_operand" "x")
1055 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1056 [(set_attr "type" "sse")
1057 (set_attr "prefix" "vex")
1058 (set_attr "mode" "SF")])
1060 (define_insn "sse_vmrsqrtv4sf2"
1061 [(set (match_operand:V4SF 0 "register_operand" "=x")
1063 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1065 (match_operand:V4SF 2 "register_operand" "0")
1068 "rsqrtss\t{%1, %0|%0, %1}"
1069 [(set_attr "type" "sse")
1070 (set_attr "mode" "SF")])
1072 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1073 ;; isn't really correct, as those rtl operators aren't defined when
1074 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1076 (define_expand "<code><mode>3"
1077 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1078 (smaxmin:AVX256MODEF2P
1079 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1080 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1081 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1083 if (!flag_finite_math_only)
1084 operands[1] = force_reg (<MODE>mode, operands[1]);
1085 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1088 (define_expand "<code><mode>3"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1091 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1092 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1093 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 if (!flag_finite_math_only)
1096 operands[1] = force_reg (<MODE>mode, operands[1]);
1097 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1100 (define_insn "*avx_<code><mode>3_finite"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1105 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1106 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1107 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<MODE>")])
1112 (define_insn "*<code><mode>3_finite"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1117 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1118 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1119 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*avx_<code><mode>3"
1124 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1126 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1127 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1128 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1129 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1130 [(set_attr "type" "sseadd")
1131 (set_attr "prefix" "vex")
1132 (set_attr "mode" "<avxvecmode>")])
1134 (define_insn "*<code><mode>3"
1135 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1137 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1138 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1139 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1140 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "mode" "<MODE>")])
1144 (define_insn "*avx_vm<code><mode>3"
1145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1146 (vec_merge:SSEMODEF2P
1148 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1149 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1152 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1153 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "type" "sse")
1155 (set_attr "prefix" "vex")
1156 (set_attr "mode" "<ssescalarmode>")])
1158 (define_insn "<sse>_vm<code><mode>3"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1160 (vec_merge:SSEMODEF2P
1162 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1163 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1166 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1167 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1168 [(set_attr "type" "sseadd")
1169 (set_attr "mode" "<ssescalarmode>")])
1171 ;; These versions of the min/max patterns implement exactly the operations
1172 ;; min = (op1 < op2 ? op1 : op2)
1173 ;; max = (!(op1 < op2) ? op1 : op2)
1174 ;; Their operands are not commutative, and thus they may be used in the
1175 ;; presence of -0.0 and NaN.
1177 (define_insn "*avx_ieee_smin<mode>3"
1178 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1180 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1181 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1183 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1184 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1185 [(set_attr "type" "sseadd")
1186 (set_attr "prefix" "vex")
1187 (set_attr "mode" "<avxvecmode>")])
1189 (define_insn "*avx_ieee_smax<mode>3"
1190 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1192 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1193 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1195 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1196 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1197 [(set_attr "type" "sseadd")
1198 (set_attr "prefix" "vex")
1199 (set_attr "mode" "<avxvecmode>")])
1201 (define_insn "*ieee_smin<mode>3"
1202 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1204 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1207 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1208 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1209 [(set_attr "type" "sseadd")
1210 (set_attr "mode" "<MODE>")])
1212 (define_insn "*ieee_smax<mode>3"
1213 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1215 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1216 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1218 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1219 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1220 [(set_attr "type" "sseadd")
1221 (set_attr "mode" "<MODE>")])
1223 (define_insn "avx_addsubv8sf3"
1224 [(set (match_operand:V8SF 0 "register_operand" "=x")
1227 (match_operand:V8SF 1 "register_operand" "x")
1228 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1229 (minus:V8SF (match_dup 1) (match_dup 2))
1232 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1233 [(set_attr "type" "sseadd")
1234 (set_attr "prefix" "vex")
1235 (set_attr "mode" "V8SF")])
1237 (define_insn "avx_addsubv4df3"
1238 [(set (match_operand:V4DF 0 "register_operand" "=x")
1241 (match_operand:V4DF 1 "register_operand" "x")
1242 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1243 (minus:V4DF (match_dup 1) (match_dup 2))
1246 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1247 [(set_attr "type" "sseadd")
1248 (set_attr "prefix" "vex")
1249 (set_attr "mode" "V4DF")])
1251 (define_insn "*avx_addsubv4sf3"
1252 [(set (match_operand:V4SF 0 "register_operand" "=x")
1255 (match_operand:V4SF 1 "register_operand" "x")
1256 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1257 (minus:V4SF (match_dup 1) (match_dup 2))
1260 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1261 [(set_attr "type" "sseadd")
1262 (set_attr "prefix" "vex")
1263 (set_attr "mode" "V4SF")])
1265 (define_insn "sse3_addsubv4sf3"
1266 [(set (match_operand:V4SF 0 "register_operand" "=x")
1269 (match_operand:V4SF 1 "register_operand" "0")
1270 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1271 (minus:V4SF (match_dup 1) (match_dup 2))
1274 "addsubps\t{%2, %0|%0, %2}"
1275 [(set_attr "type" "sseadd")
1276 (set_attr "prefix_rep" "1")
1277 (set_attr "mode" "V4SF")])
1279 (define_insn "*avx_addsubv2df3"
1280 [(set (match_operand:V2DF 0 "register_operand" "=x")
1283 (match_operand:V2DF 1 "register_operand" "x")
1284 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1285 (minus:V2DF (match_dup 1) (match_dup 2))
1288 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "type" "sseadd")
1290 (set_attr "prefix" "vex")
1291 (set_attr "mode" "V2DF")])
1293 (define_insn "sse3_addsubv2df3"
1294 [(set (match_operand:V2DF 0 "register_operand" "=x")
1297 (match_operand:V2DF 1 "register_operand" "0")
1298 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1299 (minus:V2DF (match_dup 1) (match_dup 2))
1302 "addsubpd\t{%2, %0|%0, %2}"
1303 [(set_attr "type" "sseadd")
1304 (set_attr "atom_unit" "complex")
1305 (set_attr "mode" "V2DF")])
1307 (define_insn "avx_h<plusminus_insn>v4df3"
1308 [(set (match_operand:V4DF 0 "register_operand" "=x")
1313 (match_operand:V4DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1317 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1318 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1322 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1323 (parallel [(const_int 0)]))
1324 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1326 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1327 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1329 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1330 [(set_attr "type" "sseadd")
1331 (set_attr "prefix" "vex")
1332 (set_attr "mode" "V4DF")])
1334 (define_insn "avx_h<plusminus_insn>v8sf3"
1335 [(set (match_operand:V8SF 0 "register_operand" "=x")
1341 (match_operand:V8SF 1 "register_operand" "x")
1342 (parallel [(const_int 0)]))
1343 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1345 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1346 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1350 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1351 (parallel [(const_int 0)]))
1352 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1354 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1355 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1359 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1360 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1362 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1363 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1366 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1367 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1369 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1370 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1372 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1373 [(set_attr "type" "sseadd")
1374 (set_attr "prefix" "vex")
1375 (set_attr "mode" "V8SF")])
1377 (define_insn "*avx_h<plusminus_insn>v4sf3"
1378 [(set (match_operand:V4SF 0 "register_operand" "=x")
1383 (match_operand:V4SF 1 "register_operand" "x")
1384 (parallel [(const_int 0)]))
1385 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1387 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1388 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1393 (parallel [(const_int 0)]))
1394 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1396 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1397 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1399 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1400 [(set_attr "type" "sseadd")
1401 (set_attr "prefix" "vex")
1402 (set_attr "mode" "V4SF")])
1404 (define_insn "sse3_h<plusminus_insn>v4sf3"
1405 [(set (match_operand:V4SF 0 "register_operand" "=x")
1410 (match_operand:V4SF 1 "register_operand" "0")
1411 (parallel [(const_int 0)]))
1412 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1414 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1415 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1419 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1420 (parallel [(const_int 0)]))
1421 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1423 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1424 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1426 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1427 [(set_attr "type" "sseadd")
1428 (set_attr "atom_unit" "complex")
1429 (set_attr "prefix_rep" "1")
1430 (set_attr "mode" "V4SF")])
1432 (define_insn "*avx_h<plusminus_insn>v2df3"
1433 [(set (match_operand:V2DF 0 "register_operand" "=x")
1437 (match_operand:V2DF 1 "register_operand" "x")
1438 (parallel [(const_int 0)]))
1439 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1442 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1443 (parallel [(const_int 0)]))
1444 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1446 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1447 [(set_attr "type" "sseadd")
1448 (set_attr "prefix" "vex")
1449 (set_attr "mode" "V2DF")])
1451 (define_insn "sse3_h<plusminus_insn>v2df3"
1452 [(set (match_operand:V2DF 0 "register_operand" "=x")
1456 (match_operand:V2DF 1 "register_operand" "0")
1457 (parallel [(const_int 0)]))
1458 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1461 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1462 (parallel [(const_int 0)]))
1463 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1465 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseadd")
1467 (set_attr "mode" "V2DF")])
1469 (define_expand "reduc_splus_v8sf"
1470 [(match_operand:V8SF 0 "register_operand" "")
1471 (match_operand:V8SF 1 "register_operand" "")]
1474 rtx tmp = gen_reg_rtx (V8SFmode);
1475 rtx tmp2 = gen_reg_rtx (V8SFmode);
1476 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1477 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1478 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1479 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1483 (define_expand "reduc_splus_v4sf"
1484 [(match_operand:V4SF 0 "register_operand" "")
1485 (match_operand:V4SF 1 "register_operand" "")]
1490 rtx tmp = gen_reg_rtx (V4SFmode);
1491 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1492 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1495 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1499 (define_expand "reduc_splus_v4df"
1500 [(match_operand:V4DF 0 "register_operand" "")
1501 (match_operand:V4DF 1 "register_operand" "")]
1504 rtx tmp = gen_reg_rtx (V4DFmode);
1505 rtx tmp2 = gen_reg_rtx (V4DFmode);
1506 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1507 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1508 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1512 (define_expand "reduc_splus_v2df"
1513 [(match_operand:V2DF 0 "register_operand" "")
1514 (match_operand:V2DF 1 "register_operand" "")]
1517 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1521 (define_expand "reduc_smax_v4sf"
1522 [(match_operand:V4SF 0 "register_operand" "")
1523 (match_operand:V4SF 1 "register_operand" "")]
1526 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1530 (define_expand "reduc_smin_v4sf"
1531 [(match_operand:V4SF 0 "register_operand" "")
1532 (match_operand:V4SF 1 "register_operand" "")]
1535 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1541 ;; Parallel floating point comparisons
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1545 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1546 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1548 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1549 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1550 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1553 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1554 [(set_attr "type" "ssecmp")
1555 (set_attr "length_immediate" "1")
1556 (set_attr "prefix" "vex")
1557 (set_attr "mode" "<MODE>")])
1559 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1560 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1561 (vec_merge:SSEMODEF2P
1563 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1564 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1565 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1570 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1571 [(set_attr "type" "ssecmp")
1572 (set_attr "length_immediate" "1")
1573 (set_attr "prefix" "vex")
1574 (set_attr "mode" "<ssescalarmode>")])
1576 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1577 ;; may generate 256bit vector compare instructions.
1578 (define_insn "*avx_maskcmp<mode>3"
1579 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1580 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1581 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1582 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1583 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1584 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1585 [(set_attr "type" "ssecmp")
1586 (set_attr "prefix" "vex")
1587 (set_attr "length_immediate" "1")
1588 (set_attr "mode" "<avxvecmode>")])
1590 (define_insn "<sse>_maskcmp<mode>3"
1591 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1592 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1593 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1594 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1596 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1597 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "ssecmp")
1599 (set_attr "length_immediate" "1")
1600 (set_attr "mode" "<MODE>")])
1602 (define_insn "*avx_vmmaskcmp<mode>3"
1603 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1604 (vec_merge:SSEMODEF2P
1605 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1606 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1610 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1611 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1612 [(set_attr "type" "ssecmp")
1613 (set_attr "prefix" "vex")
1614 (set_attr "mode" "<ssescalarmode>")])
1616 (define_insn "<sse>_vmmaskcmp<mode>3"
1617 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1618 (vec_merge:SSEMODEF2P
1619 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1620 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1621 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1624 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1625 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "ssecmp")
1627 (set_attr "length_immediate" "1")
1628 (set_attr "mode" "<ssescalarmode>")])
1630 (define_insn "<sse>_comi"
1631 [(set (reg:CCFP FLAGS_REG)
1634 (match_operand:<ssevecmode> 0 "register_operand" "x")
1635 (parallel [(const_int 0)]))
1637 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1638 (parallel [(const_int 0)]))))]
1639 "SSE_FLOAT_MODE_P (<MODE>mode)"
1640 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1641 [(set_attr "type" "ssecomi")
1642 (set_attr "prefix" "maybe_vex")
1643 (set_attr "prefix_rep" "0")
1644 (set (attr "prefix_data16")
1645 (if_then_else (eq_attr "mode" "DF")
1647 (const_string "0")))
1648 (set_attr "mode" "<MODE>")])
1650 (define_insn "<sse>_ucomi"
1651 [(set (reg:CCFPU FLAGS_REG)
1654 (match_operand:<ssevecmode> 0 "register_operand" "x")
1655 (parallel [(const_int 0)]))
1657 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1658 (parallel [(const_int 0)]))))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1660 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1661 [(set_attr "type" "ssecomi")
1662 (set_attr "prefix" "maybe_vex")
1663 (set_attr "prefix_rep" "0")
1664 (set (attr "prefix_data16")
1665 (if_then_else (eq_attr "mode" "DF")
1667 (const_string "0")))
1668 (set_attr "mode" "<MODE>")])
1670 (define_expand "vcond<mode>"
1671 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1672 (if_then_else:AVXMODEF2P
1673 (match_operator 3 ""
1674 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1675 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1676 (match_operand:AVXMODEF2P 1 "general_operand" "")
1677 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1678 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1679 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1681 bool ok = ix86_expand_fp_vcond (operands);
1686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1688 ;; Parallel floating point logical operations
1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692 (define_insn "avx_andnot<mode>3"
1693 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1696 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1697 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1698 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1699 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "type" "sselog")
1701 (set_attr "prefix" "vex")
1702 (set_attr "mode" "<avxvecmode>")])
1704 (define_insn "<sse>_andnot<mode>3"
1705 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1708 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1710 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1711 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1712 [(set_attr "type" "sselog")
1713 (set_attr "mode" "<MODE>")])
1715 (define_expand "<code><mode>3"
1716 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1717 (any_logic:AVX256MODEF2P
1718 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1719 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1720 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1721 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1723 (define_insn "*avx_<code><mode>3"
1724 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1725 (any_logic:AVXMODEF2P
1726 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1727 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1728 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1729 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1731 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1732 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1734 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1736 [(set_attr "type" "sselog")
1737 (set_attr "prefix" "vex")
1738 (set_attr "mode" "<avxvecmode>")])
1740 (define_expand "<code><mode>3"
1741 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1742 (any_logic:SSEMODEF2P
1743 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1744 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1745 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1746 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1748 (define_insn "*<code><mode>3"
1749 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1750 (any_logic:SSEMODEF2P
1751 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1752 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1753 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1754 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1756 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1757 return "<logic>ps\t{%2, %0|%0, %2}";
1759 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1761 [(set_attr "type" "sselog")
1762 (set_attr "mode" "<MODE>")])
1764 (define_expand "copysign<mode>3"
1767 (not:VEC_FLOAT_MODE (match_dup 3))
1768 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1770 (and:VEC_FLOAT_MODE (match_dup 3)
1771 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1772 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1773 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1776 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1778 operands[4] = gen_reg_rtx (<MODE>mode);
1779 operands[5] = gen_reg_rtx (<MODE>mode);
1782 ;; Also define scalar versions. These are used for abs, neg, and
1783 ;; conditional move. Using subregs into vector modes causes register
1784 ;; allocation lossage. These patterns do not allow memory operands
1785 ;; because the native instructions read the full 128-bits.
1787 (define_insn "*avx_andnot<mode>3"
1788 [(set (match_operand:MODEF 0 "register_operand" "=x")
1791 (match_operand:MODEF 1 "register_operand" "x"))
1792 (match_operand:MODEF 2 "register_operand" "x")))]
1793 "AVX_FLOAT_MODE_P (<MODE>mode)"
1794 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1795 [(set_attr "type" "sselog")
1796 (set_attr "prefix" "vex")
1797 (set_attr "mode" "<ssevecmode>")])
1799 (define_insn "*andnot<mode>3"
1800 [(set (match_operand:MODEF 0 "register_operand" "=x")
1803 (match_operand:MODEF 1 "register_operand" "0"))
1804 (match_operand:MODEF 2 "register_operand" "x")))]
1805 "SSE_FLOAT_MODE_P (<MODE>mode)"
1806 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "<ssevecmode>")])
1810 (define_insn "*avx_<code><mode>3"
1811 [(set (match_operand:MODEF 0 "register_operand" "=x")
1813 (match_operand:MODEF 1 "register_operand" "x")
1814 (match_operand:MODEF 2 "register_operand" "x")))]
1815 "AVX_FLOAT_MODE_P (<MODE>mode)"
1817 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1818 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1820 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1822 [(set_attr "type" "sselog")
1823 (set_attr "prefix" "vex")
1824 (set_attr "mode" "<ssevecmode>")])
1826 (define_insn "*<code><mode>3"
1827 [(set (match_operand:MODEF 0 "register_operand" "=x")
1829 (match_operand:MODEF 1 "register_operand" "0")
1830 (match_operand:MODEF 2 "register_operand" "x")))]
1831 "SSE_FLOAT_MODE_P (<MODE>mode)"
1833 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1834 return "<logic>ps\t{%2, %0|%0, %2}";
1836 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1838 [(set_attr "type" "sselog")
1839 (set_attr "mode" "<ssevecmode>")])
1841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1843 ;; FMA4 floating point multiply/accumulate instructions. This
1844 ;; includes the scalar version of the instructions as well as the
1847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1849 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1850 ;; combine to generate a multiply/add with two memory references. We then
1851 ;; split this insn, into loading up the destination register with one of the
1852 ;; memory operations. If we don't manage to split the insn, reload will
1853 ;; generate the appropriate moves. The reason this is needed, is that combine
1854 ;; has already folded one of the memory references into both the multiply and
1855 ;; add insns, and it can't generate a new pseudo. I.e.:
1856 ;; (set (reg1) (mem (addr1)))
1857 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1858 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1860 ;; Intrinsic FMA operations.
1862 ;; The standard names for fma is only available with SSE math enabled.
1863 (define_expand "fma<mode>4"
1864 [(set (match_operand:FMAMODE 0 "register_operand")
1866 (match_operand:FMAMODE 1 "nonimmediate_operand")
1867 (match_operand:FMAMODE 2 "nonimmediate_operand")
1868 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1869 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1872 (define_expand "fms<mode>4"
1873 [(set (match_operand:FMAMODE 0 "register_operand")
1875 (match_operand:FMAMODE 1 "nonimmediate_operand")
1876 (match_operand:FMAMODE 2 "nonimmediate_operand")
1877 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1878 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1881 (define_expand "fnma<mode>4"
1882 [(set (match_operand:FMAMODE 0 "register_operand")
1884 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1885 (match_operand:FMAMODE 2 "nonimmediate_operand")
1886 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1887 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1890 (define_expand "fnms<mode>4"
1891 [(set (match_operand:FMAMODE 0 "register_operand")
1893 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1894 (match_operand:FMAMODE 2 "nonimmediate_operand")
1895 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1896 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1899 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1900 (define_expand "fma4i_fmadd_<mode>"
1901 [(set (match_operand:FMAMODE 0 "register_operand")
1903 (match_operand:FMAMODE 1 "nonimmediate_operand")
1904 (match_operand:FMAMODE 2 "nonimmediate_operand")
1905 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1906 "TARGET_FMA || TARGET_FMA4"
1909 (define_insn "*fma4i_fmadd_<mode>"
1910 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1912 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1913 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1914 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1916 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1917 [(set_attr "type" "ssemuladd")
1918 (set_attr "mode" "<MODE>")])
1920 (define_insn "*fma4i_fmsub_<mode>"
1921 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1923 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1924 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1926 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1928 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929 [(set_attr "type" "ssemuladd")
1930 (set_attr "mode" "<MODE>")])
1932 (define_insn "*fma4i_fnmadd_<mode>"
1933 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1936 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1937 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1938 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1940 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1941 [(set_attr "type" "ssemuladd")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "*fma4i_fnmsub_<mode>"
1945 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1948 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1949 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1951 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1953 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1954 [(set_attr "type" "ssemuladd")
1955 (set_attr "mode" "<MODE>")])
1957 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1958 ;; entire destination register, with the high-order elements zeroed.
1960 (define_expand "fma4i_vmfmadd_<mode>"
1961 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1962 (vec_merge:SSEMODEF2P
1964 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1965 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1971 operands[4] = CONST0_RTX (<MODE>mode);
1974 (define_insn "*fma4i_vmfmadd_<mode>"
1975 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1976 (vec_merge:SSEMODEF2P
1978 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1979 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1980 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1981 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1984 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1985 [(set_attr "type" "ssemuladd")
1986 (set_attr "mode" "<MODE>")])
1988 (define_insn "*fma4i_vmfmsub_<mode>"
1989 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1990 (vec_merge:SSEMODEF2P
1992 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1993 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1995 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1996 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1999 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2000 [(set_attr "type" "ssemuladd")
2001 (set_attr "mode" "<MODE>")])
2003 (define_insn "*fma4i_vmfnmadd_<mode>"
2004 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2005 (vec_merge:SSEMODEF2P
2008 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2009 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
2010 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2011 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2014 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2015 [(set_attr "type" "ssemuladd")
2016 (set_attr "mode" "<MODE>")])
2018 (define_insn "*fma4i_vmfnmsub_<mode>"
2019 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2020 (vec_merge:SSEMODEF2P
2023 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2024 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
2026 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
2027 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2030 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2031 [(set_attr "type" "ssemuladd")
2032 (set_attr "mode" "<MODE>")])
2034 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2036 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2040 ;; It would be possible to represent these without the UNSPEC as
2043 ;; (fma op1 op2 op3)
2044 ;; (fma op1 op2 (neg op3))
2047 ;; But this doesn't seem useful in practice.
2049 (define_expand "fmaddsub_<mode>"
2050 [(set (match_operand:AVXMODEF2P 0 "register_operand")
2052 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
2053 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
2054 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
2056 "TARGET_FMA || TARGET_FMA4"
2059 (define_insn "*fma4_fmaddsub_<mode>"
2060 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2062 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2063 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2064 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
2067 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2068 [(set_attr "type" "ssemuladd")
2069 (set_attr "mode" "<MODE>")])
2071 (define_insn "*fma4_fmsubadd_<mode>"
2072 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2074 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2075 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2077 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2080 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2081 [(set_attr "type" "ssemuladd")
2082 (set_attr "mode" "<MODE>")])
2084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2086 ;; FMA3 floating point multiply/accumulate instructions.
2088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2090 (define_insn "*fma_fmadd_<mode>"
2091 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2093 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2094 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2095 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2098 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2099 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2100 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2101 [(set_attr "type" "ssemuladd")
2102 (set_attr "mode" "<MODE>")])
2104 (define_insn "*fma_fmsub_<mode>"
2105 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2107 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2108 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2110 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2113 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2114 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2115 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2116 [(set_attr "type" "ssemuladd")
2117 (set_attr "mode" "<MODE>")])
2119 (define_insn "*fma_fmadd_<mode>"
2120 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2123 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2124 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2125 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2128 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2129 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2130 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2131 [(set_attr "type" "ssemuladd")
2132 (set_attr "mode" "<MODE>")])
2134 (define_insn "*fma_fmsub_<mode>"
2135 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2138 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2139 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2141 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2144 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2145 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2146 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2147 [(set_attr "type" "ssemuladd")
2148 (set_attr "mode" "<MODE>")])
2150 (define_insn "*fma_fmaddsub_<mode>"
2151 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2153 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2154 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2155 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
2159 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2160 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2161 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2162 [(set_attr "type" "ssemuladd")
2163 (set_attr "mode" "<MODE>")])
2165 (define_insn "*fma_fmsubadd_<mode>"
2166 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2168 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2169 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2171 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
2175 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2176 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2177 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2178 [(set_attr "type" "ssemuladd")
2179 (set_attr "mode" "<MODE>")])
2181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2183 ;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
2185 ;; ??? If fused-madd were a generic flag, combine could do this without
2186 ;; needing splitters here in the backend. Irritatingly, combine won't
2187 ;; recognize many of these with mere splits, since only 3 or more insns
2188 ;; are allowed to split during combine. Thankfully, there's always a
2189 ;; split_all_insns pass that runs before reload.
2191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2193 (define_insn_and_split "*split_fma"
2194 [(set (match_operand:FMAMODE 0 "register_operand")
2197 (match_operand:FMAMODE 1 "nonimmediate_operand")
2198 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2199 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2200 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2201 && (TARGET_FMA || TARGET_FMA4)
2202 && !(reload_in_progress || reload_completed)"
2203 { gcc_unreachable (); }
2212 ;; Floating multiply and subtract.
2213 (define_insn_and_split "*split_fms"
2214 [(set (match_operand:FMAMODE 0 "register_operand")
2217 (match_operand:FMAMODE 1 "nonimmediate_operand")
2218 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2219 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2220 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2221 && (TARGET_FMA || TARGET_FMA4)
2222 && !(reload_in_progress || reload_completed)"
2223 { gcc_unreachable (); }
2229 (neg:FMAMODE (match_dup 3))))]
2232 ;; Floating point negative multiply and add.
2233 ;; Recognize (-a * b + c) via the canonical form: c - (a * b).
2234 (define_insn_and_split "*split_fnma"
2235 [(set (match_operand:FMAMODE 0 "register_operand")
2237 (match_operand:FMAMODE 3 "nonimmediate_operand")
2239 (match_operand:FMAMODE 1 "nonimmediate_operand")
2240 (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
2241 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2242 && (TARGET_FMA || TARGET_FMA4)
2243 && !(reload_in_progress || reload_completed)"
2244 { gcc_unreachable (); }
2248 (neg:FMAMODE (match_dup 1))
2253 ;; Floating point negative multiply and subtract.
2254 ;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
2255 (define_insn_and_split "*split_fnms"
2256 [(set (match_operand:FMAMODE 0 "register_operand")
2260 (match_operand:FMAMODE 1 "nonimmediate_operand"))
2261 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2262 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2263 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2264 && (TARGET_FMA || TARGET_FMA4)
2265 && !(reload_in_progress || reload_completed)"
2266 { gcc_unreachable (); }
2270 (neg:FMAMODE (match_dup 1))
2272 (neg:FMAMODE (match_dup 3))))]
2275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2277 ;; Parallel single-precision floating point conversion operations
2279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2281 (define_insn "sse_cvtpi2ps"
2282 [(set (match_operand:V4SF 0 "register_operand" "=x")
2285 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2286 (match_operand:V4SF 1 "register_operand" "0")
2289 "cvtpi2ps\t{%2, %0|%0, %2}"
2290 [(set_attr "type" "ssecvt")
2291 (set_attr "mode" "V4SF")])
2293 (define_insn "sse_cvtps2pi"
2294 [(set (match_operand:V2SI 0 "register_operand" "=y")
2296 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2298 (parallel [(const_int 0) (const_int 1)])))]
2300 "cvtps2pi\t{%1, %0|%0, %1}"
2301 [(set_attr "type" "ssecvt")
2302 (set_attr "unit" "mmx")
2303 (set_attr "mode" "DI")])
2305 (define_insn "sse_cvttps2pi"
2306 [(set (match_operand:V2SI 0 "register_operand" "=y")
2308 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2309 (parallel [(const_int 0) (const_int 1)])))]
2311 "cvttps2pi\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "ssecvt")
2313 (set_attr "unit" "mmx")
2314 (set_attr "prefix_rep" "0")
2315 (set_attr "mode" "SF")])
2317 (define_insn "*avx_cvtsi2ss"
2318 [(set (match_operand:V4SF 0 "register_operand" "=x")
2321 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2322 (match_operand:V4SF 1 "register_operand" "x")
2325 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2326 [(set_attr "type" "sseicvt")
2327 (set_attr "prefix" "vex")
2328 (set_attr "mode" "SF")])
2330 (define_insn "sse_cvtsi2ss"
2331 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2334 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2335 (match_operand:V4SF 1 "register_operand" "0,0")
2338 "cvtsi2ss\t{%2, %0|%0, %2}"
2339 [(set_attr "type" "sseicvt")
2340 (set_attr "athlon_decode" "vector,double")
2341 (set_attr "amdfam10_decode" "vector,double")
2342 (set_attr "bdver1_decode" "double,direct")
2343 (set_attr "mode" "SF")])
2345 (define_insn "*avx_cvtsi2ssq"
2346 [(set (match_operand:V4SF 0 "register_operand" "=x")
2349 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2350 (match_operand:V4SF 1 "register_operand" "x")
2352 "TARGET_AVX && TARGET_64BIT"
2353 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2354 [(set_attr "type" "sseicvt")
2355 (set_attr "length_vex" "4")
2356 (set_attr "prefix" "vex")
2357 (set_attr "mode" "SF")])
2359 (define_insn "sse_cvtsi2ssq"
2360 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2363 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2364 (match_operand:V4SF 1 "register_operand" "0,0")
2366 "TARGET_SSE && TARGET_64BIT"
2367 "cvtsi2ssq\t{%2, %0|%0, %2}"
2368 [(set_attr "type" "sseicvt")
2369 (set_attr "prefix_rex" "1")
2370 (set_attr "athlon_decode" "vector,double")
2371 (set_attr "amdfam10_decode" "vector,double")
2372 (set_attr "bdver1_decode" "double,direct")
2373 (set_attr "mode" "SF")])
2375 (define_insn "sse_cvtss2si"
2376 [(set (match_operand:SI 0 "register_operand" "=r,r")
2379 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2380 (parallel [(const_int 0)]))]
2381 UNSPEC_FIX_NOTRUNC))]
2383 "%vcvtss2si\t{%1, %0|%0, %1}"
2384 [(set_attr "type" "sseicvt")
2385 (set_attr "athlon_decode" "double,vector")
2386 (set_attr "bdver1_decode" "double,double")
2387 (set_attr "prefix_rep" "1")
2388 (set_attr "prefix" "maybe_vex")
2389 (set_attr "mode" "SI")])
2391 (define_insn "sse_cvtss2si_2"
2392 [(set (match_operand:SI 0 "register_operand" "=r,r")
2393 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2394 UNSPEC_FIX_NOTRUNC))]
2396 "%vcvtss2si\t{%1, %0|%0, %1}"
2397 [(set_attr "type" "sseicvt")
2398 (set_attr "athlon_decode" "double,vector")
2399 (set_attr "amdfam10_decode" "double,double")
2400 (set_attr "bdver1_decode" "double,double")
2401 (set_attr "prefix_rep" "1")
2402 (set_attr "prefix" "maybe_vex")
2403 (set_attr "mode" "SI")])
2405 (define_insn "sse_cvtss2siq"
2406 [(set (match_operand:DI 0 "register_operand" "=r,r")
2409 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2410 (parallel [(const_int 0)]))]
2411 UNSPEC_FIX_NOTRUNC))]
2412 "TARGET_SSE && TARGET_64BIT"
2413 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "sseicvt")
2415 (set_attr "athlon_decode" "double,vector")
2416 (set_attr "bdver1_decode" "double,double")
2417 (set_attr "prefix_rep" "1")
2418 (set_attr "prefix" "maybe_vex")
2419 (set_attr "mode" "DI")])
2421 (define_insn "sse_cvtss2siq_2"
2422 [(set (match_operand:DI 0 "register_operand" "=r,r")
2423 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2424 UNSPEC_FIX_NOTRUNC))]
2425 "TARGET_SSE && TARGET_64BIT"
2426 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2427 [(set_attr "type" "sseicvt")
2428 (set_attr "athlon_decode" "double,vector")
2429 (set_attr "amdfam10_decode" "double,double")
2430 (set_attr "bdver1_decode" "double,double")
2431 (set_attr "prefix_rep" "1")
2432 (set_attr "prefix" "maybe_vex")
2433 (set_attr "mode" "DI")])
2435 (define_insn "sse_cvttss2si"
2436 [(set (match_operand:SI 0 "register_operand" "=r,r")
2439 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2440 (parallel [(const_int 0)]))))]
2442 "%vcvttss2si\t{%1, %0|%0, %1}"
2443 [(set_attr "type" "sseicvt")
2444 (set_attr "athlon_decode" "double,vector")
2445 (set_attr "amdfam10_decode" "double,double")
2446 (set_attr "bdver1_decode" "double,double")
2447 (set_attr "prefix_rep" "1")
2448 (set_attr "prefix" "maybe_vex")
2449 (set_attr "mode" "SI")])
2451 (define_insn "sse_cvttss2siq"
2452 [(set (match_operand:DI 0 "register_operand" "=r,r")
2455 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2456 (parallel [(const_int 0)]))))]
2457 "TARGET_SSE && TARGET_64BIT"
2458 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2459 [(set_attr "type" "sseicvt")
2460 (set_attr "athlon_decode" "double,vector")
2461 (set_attr "amdfam10_decode" "double,double")
2462 (set_attr "bdver1_decode" "double,double")
2463 (set_attr "prefix_rep" "1")
2464 (set_attr "prefix" "maybe_vex")
2465 (set_attr "mode" "DI")])
2467 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2468 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2469 (float:AVXMODEDCVTDQ2PS
2470 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2472 "vcvtdq2ps\t{%1, %0|%0, %1}"
2473 [(set_attr "type" "ssecvt")
2474 (set_attr "prefix" "vex")
2475 (set_attr "mode" "<avxvecmode>")])
2477 (define_insn "sse2_cvtdq2ps"
2478 [(set (match_operand:V4SF 0 "register_operand" "=x")
2479 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2481 "cvtdq2ps\t{%1, %0|%0, %1}"
2482 [(set_attr "type" "ssecvt")
2483 (set_attr "mode" "V4SF")])
2485 (define_expand "sse2_cvtudq2ps"
2487 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2489 (lt:V4SF (match_dup 5) (match_dup 3)))
2491 (and:V4SF (match_dup 6) (match_dup 4)))
2492 (set (match_operand:V4SF 0 "register_operand" "")
2493 (plus:V4SF (match_dup 5) (match_dup 7)))]
2496 REAL_VALUE_TYPE TWO32r;
2500 real_ldexp (&TWO32r, &dconst1, 32);
2501 x = const_double_from_real_value (TWO32r, SFmode);
2503 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2504 operands[4] = force_reg (V4SFmode,
2505 ix86_build_const_vector (V4SFmode, 1, x));
2507 for (i = 5; i < 8; i++)
2508 operands[i] = gen_reg_rtx (V4SFmode);
2511 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2512 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2513 (unspec:AVXMODEDCVTPS2DQ
2514 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2515 UNSPEC_FIX_NOTRUNC))]
2517 "vcvtps2dq\t{%1, %0|%0, %1}"
2518 [(set_attr "type" "ssecvt")
2519 (set_attr "prefix" "vex")
2520 (set_attr "mode" "<avxvecmode>")])
2522 (define_insn "sse2_cvtps2dq"
2523 [(set (match_operand:V4SI 0 "register_operand" "=x")
2524 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2525 UNSPEC_FIX_NOTRUNC))]
2527 "cvtps2dq\t{%1, %0|%0, %1}"
2528 [(set_attr "type" "ssecvt")
2529 (set_attr "prefix_data16" "1")
2530 (set_attr "mode" "TI")])
2532 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2533 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2534 (fix:AVXMODEDCVTPS2DQ
2535 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2537 "vcvttps2dq\t{%1, %0|%0, %1}"
2538 [(set_attr "type" "ssecvt")
2539 (set_attr "prefix" "vex")
2540 (set_attr "mode" "<avxvecmode>")])
2542 (define_insn "sse2_cvttps2dq"
2543 [(set (match_operand:V4SI 0 "register_operand" "=x")
2544 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2546 "cvttps2dq\t{%1, %0|%0, %1}"
2547 [(set_attr "type" "ssecvt")
2548 (set_attr "prefix_rep" "1")
2549 (set_attr "prefix_data16" "0")
2550 (set_attr "mode" "TI")])
2552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2554 ;; Parallel double-precision floating point conversion operations
2556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2558 (define_insn "sse2_cvtpi2pd"
2559 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2560 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2562 "cvtpi2pd\t{%1, %0|%0, %1}"
2563 [(set_attr "type" "ssecvt")
2564 (set_attr "unit" "mmx,*")
2565 (set_attr "prefix_data16" "1,*")
2566 (set_attr "mode" "V2DF")])
2568 (define_insn "sse2_cvtpd2pi"
2569 [(set (match_operand:V2SI 0 "register_operand" "=y")
2570 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2571 UNSPEC_FIX_NOTRUNC))]
2573 "cvtpd2pi\t{%1, %0|%0, %1}"
2574 [(set_attr "type" "ssecvt")
2575 (set_attr "unit" "mmx")
2576 (set_attr "prefix_data16" "1")
2577 (set_attr "mode" "DI")
2578 (set_attr "bdver1_decode" "double")])
2580 (define_insn "sse2_cvttpd2pi"
2581 [(set (match_operand:V2SI 0 "register_operand" "=y")
2582 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2584 "cvttpd2pi\t{%1, %0|%0, %1}"
2585 [(set_attr "type" "ssecvt")
2586 (set_attr "unit" "mmx")
2587 (set_attr "prefix_data16" "1")
2588 (set_attr "mode" "TI")
2589 (set_attr "bdver1_decode" "double")])
2591 (define_insn "*avx_cvtsi2sd"
2592 [(set (match_operand:V2DF 0 "register_operand" "=x")
2595 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2596 (match_operand:V2DF 1 "register_operand" "x")
2599 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2600 [(set_attr "type" "sseicvt")
2601 (set_attr "prefix" "vex")
2602 (set_attr "mode" "DF")])
2604 (define_insn "sse2_cvtsi2sd"
2605 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2608 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2609 (match_operand:V2DF 1 "register_operand" "0,0")
2612 "cvtsi2sd\t{%2, %0|%0, %2}"
2613 [(set_attr "type" "sseicvt")
2614 (set_attr "mode" "DF")
2615 (set_attr "athlon_decode" "double,direct")
2616 (set_attr "amdfam10_decode" "vector,double")
2617 (set_attr "bdver1_decode" "double,direct")])
2619 (define_insn "*avx_cvtsi2sdq"
2620 [(set (match_operand:V2DF 0 "register_operand" "=x")
2623 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2624 (match_operand:V2DF 1 "register_operand" "x")
2626 "TARGET_AVX && TARGET_64BIT"
2627 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2628 [(set_attr "type" "sseicvt")
2629 (set_attr "length_vex" "4")
2630 (set_attr "prefix" "vex")
2631 (set_attr "mode" "DF")])
2633 (define_insn "sse2_cvtsi2sdq"
2634 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2637 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2638 (match_operand:V2DF 1 "register_operand" "0,0")
2640 "TARGET_SSE2 && TARGET_64BIT"
2641 "cvtsi2sdq\t{%2, %0|%0, %2}"
2642 [(set_attr "type" "sseicvt")
2643 (set_attr "prefix_rex" "1")
2644 (set_attr "mode" "DF")
2645 (set_attr "athlon_decode" "double,direct")
2646 (set_attr "amdfam10_decode" "vector,double")
2647 (set_attr "bdver1_decode" "double,direct")])
2649 (define_insn "sse2_cvtsd2si"
2650 [(set (match_operand:SI 0 "register_operand" "=r,r")
2653 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2654 (parallel [(const_int 0)]))]
2655 UNSPEC_FIX_NOTRUNC))]
2657 "%vcvtsd2si\t{%1, %0|%0, %1}"
2658 [(set_attr "type" "sseicvt")
2659 (set_attr "athlon_decode" "double,vector")
2660 (set_attr "bdver1_decode" "double,double")
2661 (set_attr "prefix_rep" "1")
2662 (set_attr "prefix" "maybe_vex")
2663 (set_attr "mode" "SI")])
2665 (define_insn "sse2_cvtsd2si_2"
2666 [(set (match_operand:SI 0 "register_operand" "=r,r")
2667 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2668 UNSPEC_FIX_NOTRUNC))]
2670 "%vcvtsd2si\t{%1, %0|%0, %1}"
2671 [(set_attr "type" "sseicvt")
2672 (set_attr "athlon_decode" "double,vector")
2673 (set_attr "amdfam10_decode" "double,double")
2674 (set_attr "bdver1_decode" "double,double")
2675 (set_attr "prefix_rep" "1")
2676 (set_attr "prefix" "maybe_vex")
2677 (set_attr "mode" "SI")])
2679 (define_insn "sse2_cvtsd2siq"
2680 [(set (match_operand:DI 0 "register_operand" "=r,r")
2683 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2684 (parallel [(const_int 0)]))]
2685 UNSPEC_FIX_NOTRUNC))]
2686 "TARGET_SSE2 && TARGET_64BIT"
2687 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2688 [(set_attr "type" "sseicvt")
2689 (set_attr "athlon_decode" "double,vector")
2690 (set_attr "bdver1_decode" "double,double")
2691 (set_attr "prefix_rep" "1")
2692 (set_attr "prefix" "maybe_vex")
2693 (set_attr "mode" "DI")])
2695 (define_insn "sse2_cvtsd2siq_2"
2696 [(set (match_operand:DI 0 "register_operand" "=r,r")
2697 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2698 UNSPEC_FIX_NOTRUNC))]
2699 "TARGET_SSE2 && TARGET_64BIT"
2700 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "sseicvt")
2702 (set_attr "athlon_decode" "double,vector")
2703 (set_attr "amdfam10_decode" "double,double")
2704 (set_attr "bdver1_decode" "double,double")
2705 (set_attr "prefix_rep" "1")
2706 (set_attr "prefix" "maybe_vex")
2707 (set_attr "mode" "DI")])
2709 (define_insn "sse2_cvttsd2si"
2710 [(set (match_operand:SI 0 "register_operand" "=r,r")
2713 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2714 (parallel [(const_int 0)]))))]
2716 "%vcvttsd2si\t{%1, %0|%0, %1}"
2717 [(set_attr "type" "sseicvt")
2718 (set_attr "prefix_rep" "1")
2719 (set_attr "prefix" "maybe_vex")
2720 (set_attr "mode" "SI")
2721 (set_attr "athlon_decode" "double,vector")
2722 (set_attr "amdfam10_decode" "double,double")
2723 (set_attr "bdver1_decode" "double,double")])
2725 (define_insn "sse2_cvttsd2siq"
2726 [(set (match_operand:DI 0 "register_operand" "=r,r")
2729 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2730 (parallel [(const_int 0)]))))]
2731 "TARGET_SSE2 && TARGET_64BIT"
2732 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2733 [(set_attr "type" "sseicvt")
2734 (set_attr "prefix_rep" "1")
2735 (set_attr "prefix" "maybe_vex")
2736 (set_attr "mode" "DI")
2737 (set_attr "athlon_decode" "double,vector")
2738 (set_attr "amdfam10_decode" "double,double")
2739 (set_attr "bdver1_decode" "double,double")])
2741 (define_insn "avx_cvtdq2pd256"
2742 [(set (match_operand:V4DF 0 "register_operand" "=x")
2743 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2745 "vcvtdq2pd\t{%1, %0|%0, %1}"
2746 [(set_attr "type" "ssecvt")
2747 (set_attr "prefix" "vex")
2748 (set_attr "mode" "V4DF")])
2750 (define_insn "*avx_cvtdq2pd256_2"
2751 [(set (match_operand:V4DF 0 "register_operand" "=x")
2754 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2755 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2757 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2758 [(set_attr "type" "ssecvt")
2759 (set_attr "prefix" "vex")
2760 (set_attr "mode" "V4DF")])
2762 (define_insn "sse2_cvtdq2pd"
2763 [(set (match_operand:V2DF 0 "register_operand" "=x")
2766 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2767 (parallel [(const_int 0) (const_int 1)]))))]
2769 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2770 [(set_attr "type" "ssecvt")
2771 (set_attr "prefix" "maybe_vex")
2772 (set_attr "mode" "V2DF")])
2774 (define_insn "avx_cvtpd2dq256"
2775 [(set (match_operand:V4SI 0 "register_operand" "=x")
2776 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2777 UNSPEC_FIX_NOTRUNC))]
2779 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2780 [(set_attr "type" "ssecvt")
2781 (set_attr "prefix" "vex")
2782 (set_attr "mode" "OI")])
2784 (define_expand "sse2_cvtpd2dq"
2785 [(set (match_operand:V4SI 0 "register_operand" "")
2787 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2791 "operands[2] = CONST0_RTX (V2SImode);")
2793 (define_insn "*sse2_cvtpd2dq"
2794 [(set (match_operand:V4SI 0 "register_operand" "=x")
2796 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2798 (match_operand:V2SI 2 "const0_operand" "")))]
2800 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2801 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2802 [(set_attr "type" "ssecvt")
2803 (set_attr "prefix_rep" "1")
2804 (set_attr "prefix_data16" "0")
2805 (set_attr "prefix" "maybe_vex")
2806 (set_attr "mode" "TI")
2807 (set_attr "amdfam10_decode" "double")
2808 (set_attr "bdver1_decode" "double")])
2810 (define_insn "avx_cvttpd2dq256"
2811 [(set (match_operand:V4SI 0 "register_operand" "=x")
2812 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2814 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2815 [(set_attr "type" "ssecvt")
2816 (set_attr "prefix" "vex")
2817 (set_attr "mode" "OI")])
2819 (define_expand "sse2_cvttpd2dq"
2820 [(set (match_operand:V4SI 0 "register_operand" "")
2822 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2825 "operands[2] = CONST0_RTX (V2SImode);")
2827 (define_insn "*sse2_cvttpd2dq"
2828 [(set (match_operand:V4SI 0 "register_operand" "=x")
2830 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2831 (match_operand:V2SI 2 "const0_operand" "")))]
2833 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2834 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2835 [(set_attr "type" "ssecvt")
2836 (set_attr "prefix" "maybe_vex")
2837 (set_attr "mode" "TI")
2838 (set_attr "amdfam10_decode" "double")
2839 (set_attr "bdver1_decode" "double")])
2841 (define_insn "*avx_cvtsd2ss"
2842 [(set (match_operand:V4SF 0 "register_operand" "=x")
2845 (float_truncate:V2SF
2846 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2847 (match_operand:V4SF 1 "register_operand" "x")
2850 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2851 [(set_attr "type" "ssecvt")
2852 (set_attr "prefix" "vex")
2853 (set_attr "mode" "SF")])
2855 (define_insn "sse2_cvtsd2ss"
2856 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2859 (float_truncate:V2SF
2860 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2861 (match_operand:V4SF 1 "register_operand" "0,0")
2864 "cvtsd2ss\t{%2, %0|%0, %2}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "athlon_decode" "vector,double")
2867 (set_attr "amdfam10_decode" "vector,double")
2868 (set_attr "bdver1_decode" "direct,direct")
2869 (set_attr "mode" "SF")])
2871 (define_insn "*avx_cvtss2sd"
2872 [(set (match_operand:V2DF 0 "register_operand" "=x")
2876 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2877 (parallel [(const_int 0) (const_int 1)])))
2878 (match_operand:V2DF 1 "register_operand" "x")
2881 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2882 [(set_attr "type" "ssecvt")
2883 (set_attr "prefix" "vex")
2884 (set_attr "mode" "DF")])
2886 (define_insn "sse2_cvtss2sd"
2887 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2891 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2892 (parallel [(const_int 0) (const_int 1)])))
2893 (match_operand:V2DF 1 "register_operand" "0,0")
2896 "cvtss2sd\t{%2, %0|%0, %2}"
2897 [(set_attr "type" "ssecvt")
2898 (set_attr "amdfam10_decode" "vector,double")
2899 (set_attr "bdver1_decode" "direct,direct")
2900 (set_attr "mode" "DF")])
2902 (define_insn "avx_cvtpd2ps256"
2903 [(set (match_operand:V4SF 0 "register_operand" "=x")
2904 (float_truncate:V4SF
2905 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2907 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2908 [(set_attr "type" "ssecvt")
2909 (set_attr "prefix" "vex")
2910 (set_attr "mode" "V4SF")])
2912 (define_expand "sse2_cvtpd2ps"
2913 [(set (match_operand:V4SF 0 "register_operand" "")
2915 (float_truncate:V2SF
2916 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2919 "operands[2] = CONST0_RTX (V2SFmode);")
2921 (define_insn "*sse2_cvtpd2ps"
2922 [(set (match_operand:V4SF 0 "register_operand" "=x")
2924 (float_truncate:V2SF
2925 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2926 (match_operand:V2SF 2 "const0_operand" "")))]
2928 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2929 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2930 [(set_attr "type" "ssecvt")
2931 (set_attr "prefix_data16" "1")
2932 (set_attr "prefix" "maybe_vex")
2933 (set_attr "mode" "V4SF")
2934 (set_attr "amdfam10_decode" "double")
2935 (set_attr "bdver1_decode" "double")])
2937 (define_insn "avx_cvtps2pd256"
2938 [(set (match_operand:V4DF 0 "register_operand" "=x")
2940 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2942 "vcvtps2pd\t{%1, %0|%0, %1}"
2943 [(set_attr "type" "ssecvt")
2944 (set_attr "prefix" "vex")
2945 (set_attr "mode" "V4DF")])
2947 (define_insn "*avx_cvtps2pd256_2"
2948 [(set (match_operand:V4DF 0 "register_operand" "=x")
2951 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2952 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2954 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2955 [(set_attr "type" "ssecvt")
2956 (set_attr "prefix" "vex")
2957 (set_attr "mode" "V4DF")])
2959 (define_insn "sse2_cvtps2pd"
2960 [(set (match_operand:V2DF 0 "register_operand" "=x")
2963 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2964 (parallel [(const_int 0) (const_int 1)]))))]
2966 "%vcvtps2pd\t{%1, %0|%0, %1}"
2967 [(set_attr "type" "ssecvt")
2968 (set_attr "prefix" "maybe_vex")
2969 (set_attr "mode" "V2DF")
2970 (set_attr "prefix_data16" "0")
2971 (set_attr "amdfam10_decode" "direct")
2972 (set_attr "bdver1_decode" "double")])
2974 (define_expand "vec_unpacks_hi_v4sf"
2979 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2980 (parallel [(const_int 6)
2984 (set (match_operand:V2DF 0 "register_operand" "")
2988 (parallel [(const_int 0) (const_int 1)]))))]
2990 "operands[2] = gen_reg_rtx (V4SFmode);")
2992 (define_expand "vec_unpacks_hi_v8sf"
2995 (match_operand:V8SF 1 "nonimmediate_operand" "")
2996 (parallel [(const_int 4)
3000 (set (match_operand:V4DF 0 "register_operand" "")
3005 operands[2] = gen_reg_rtx (V4SFmode);
3008 (define_expand "vec_unpacks_lo_v4sf"
3009 [(set (match_operand:V2DF 0 "register_operand" "")
3012 (match_operand:V4SF 1 "nonimmediate_operand" "")
3013 (parallel [(const_int 0) (const_int 1)]))))]
3016 (define_expand "vec_unpacks_lo_v8sf"
3017 [(set (match_operand:V4DF 0 "register_operand" "")
3020 (match_operand:V8SF 1 "nonimmediate_operand" "")
3021 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3024 (define_expand "vec_unpacks_float_hi_v8hi"
3025 [(match_operand:V4SF 0 "register_operand" "")
3026 (match_operand:V8HI 1 "register_operand" "")]
3029 rtx tmp = gen_reg_rtx (V4SImode);
3031 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3032 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3036 (define_expand "vec_unpacks_float_lo_v8hi"
3037 [(match_operand:V4SF 0 "register_operand" "")
3038 (match_operand:V8HI 1 "register_operand" "")]
3041 rtx tmp = gen_reg_rtx (V4SImode);
3043 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3044 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3048 (define_expand "vec_unpacku_float_hi_v8hi"
3049 [(match_operand:V4SF 0 "register_operand" "")
3050 (match_operand:V8HI 1 "register_operand" "")]
3053 rtx tmp = gen_reg_rtx (V4SImode);
3055 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3056 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3060 (define_expand "vec_unpacku_float_lo_v8hi"
3061 [(match_operand:V4SF 0 "register_operand" "")
3062 (match_operand:V8HI 1 "register_operand" "")]
3065 rtx tmp = gen_reg_rtx (V4SImode);
3067 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3068 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3072 (define_expand "vec_unpacks_float_hi_v4si"
3075 (match_operand:V4SI 1 "nonimmediate_operand" "")
3076 (parallel [(const_int 2)
3080 (set (match_operand:V2DF 0 "register_operand" "")
3084 (parallel [(const_int 0) (const_int 1)]))))]
3086 "operands[2] = gen_reg_rtx (V4SImode);")
3088 (define_expand "vec_unpacks_float_lo_v4si"
3089 [(set (match_operand:V2DF 0 "register_operand" "")
3092 (match_operand:V4SI 1 "nonimmediate_operand" "")
3093 (parallel [(const_int 0) (const_int 1)]))))]
3096 (define_expand "vec_unpacks_float_hi_v8si"
3099 (match_operand:V8SI 1 "nonimmediate_operand" "")
3100 (parallel [(const_int 4)
3104 (set (match_operand:V4DF 0 "register_operand" "")
3108 "operands[2] = gen_reg_rtx (V4SImode);")
3110 (define_expand "vec_unpacks_float_lo_v8si"
3111 [(set (match_operand:V4DF 0 "register_operand" "")
3114 (match_operand:V8SI 1 "nonimmediate_operand" "")
3115 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3118 (define_expand "vec_unpacku_float_hi_v4si"
3121 (match_operand:V4SI 1 "nonimmediate_operand" "")
3122 (parallel [(const_int 2)
3130 (parallel [(const_int 0) (const_int 1)]))))
3132 (lt:V2DF (match_dup 6) (match_dup 3)))
3134 (and:V2DF (match_dup 7) (match_dup 4)))
3135 (set (match_operand:V2DF 0 "register_operand" "")
3136 (plus:V2DF (match_dup 6) (match_dup 8)))]
3139 REAL_VALUE_TYPE TWO32r;
3143 real_ldexp (&TWO32r, &dconst1, 32);
3144 x = const_double_from_real_value (TWO32r, DFmode);
3146 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3147 operands[4] = force_reg (V2DFmode,
3148 ix86_build_const_vector (V2DFmode, 1, x));
3150 operands[5] = gen_reg_rtx (V4SImode);
3152 for (i = 6; i < 9; i++)
3153 operands[i] = gen_reg_rtx (V2DFmode);
3156 (define_expand "vec_unpacku_float_lo_v4si"
3160 (match_operand:V4SI 1 "nonimmediate_operand" "")
3161 (parallel [(const_int 0) (const_int 1)]))))
3163 (lt:V2DF (match_dup 5) (match_dup 3)))
3165 (and:V2DF (match_dup 6) (match_dup 4)))
3166 (set (match_operand:V2DF 0 "register_operand" "")
3167 (plus:V2DF (match_dup 5) (match_dup 7)))]
3170 REAL_VALUE_TYPE TWO32r;
3174 real_ldexp (&TWO32r, &dconst1, 32);
3175 x = const_double_from_real_value (TWO32r, DFmode);
3177 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3178 operands[4] = force_reg (V2DFmode,
3179 ix86_build_const_vector (V2DFmode, 1, x));
3181 for (i = 5; i < 8; i++)
3182 operands[i] = gen_reg_rtx (V2DFmode);
3185 (define_expand "vec_pack_trunc_v4df"
3187 (float_truncate:V4SF
3188 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3190 (float_truncate:V4SF
3191 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3192 (set (match_operand:V8SF 0 "register_operand" "")
3198 operands[3] = gen_reg_rtx (V4SFmode);
3199 operands[4] = gen_reg_rtx (V4SFmode);
3202 (define_expand "vec_pack_trunc_v2df"
3203 [(match_operand:V4SF 0 "register_operand" "")
3204 (match_operand:V2DF 1 "nonimmediate_operand" "")
3205 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3210 r1 = gen_reg_rtx (V4SFmode);
3211 r2 = gen_reg_rtx (V4SFmode);
3213 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3214 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3215 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3219 (define_expand "vec_pack_sfix_trunc_v2df"
3220 [(match_operand:V4SI 0 "register_operand" "")
3221 (match_operand:V2DF 1 "nonimmediate_operand" "")
3222 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3227 r1 = gen_reg_rtx (V4SImode);
3228 r2 = gen_reg_rtx (V4SImode);
3230 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3231 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3232 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3233 gen_lowpart (V2DImode, r1),
3234 gen_lowpart (V2DImode, r2)));
3238 (define_expand "vec_pack_sfix_v2df"
3239 [(match_operand:V4SI 0 "register_operand" "")
3240 (match_operand:V2DF 1 "nonimmediate_operand" "")
3241 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3246 r1 = gen_reg_rtx (V4SImode);
3247 r2 = gen_reg_rtx (V4SImode);
3249 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3250 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3251 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3252 gen_lowpart (V2DImode, r1),
3253 gen_lowpart (V2DImode, r2)));
3257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3259 ;; Parallel single-precision floating point element swizzling
3261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3263 (define_expand "sse_movhlps_exp"
3264 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3267 (match_operand:V4SF 1 "nonimmediate_operand" "")
3268 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3269 (parallel [(const_int 6)
3275 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3277 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3279 /* Fix up the destination if needed. */
3280 if (dst != operands[0])
3281 emit_move_insn (operands[0], dst);
3286 (define_insn "*avx_movhlps"
3287 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3290 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3291 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3292 (parallel [(const_int 6)
3296 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3298 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3299 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3300 vmovhps\t{%2, %0|%0, %2}"
3301 [(set_attr "type" "ssemov")
3302 (set_attr "prefix" "vex")
3303 (set_attr "mode" "V4SF,V2SF,V2SF")])
3305 (define_insn "sse_movhlps"
3306 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3309 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3310 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3311 (parallel [(const_int 6)
3315 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3317 movhlps\t{%2, %0|%0, %2}
3318 movlps\t{%H2, %0|%0, %H2}
3319 movhps\t{%2, %0|%0, %2}"
3320 [(set_attr "type" "ssemov")
3321 (set_attr "mode" "V4SF,V2SF,V2SF")])
3323 (define_expand "sse_movlhps_exp"
3324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3327 (match_operand:V4SF 1 "nonimmediate_operand" "")
3328 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3329 (parallel [(const_int 0)
3335 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3337 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3339 /* Fix up the destination if needed. */
3340 if (dst != operands[0])
3341 emit_move_insn (operands[0], dst);
3346 (define_insn "*avx_movlhps"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3350 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3351 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3352 (parallel [(const_int 0)
3356 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3358 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3359 vmovhps\t{%2, %1, %0|%0, %1, %2}
3360 vmovlps\t{%2, %H0|%H0, %2}"
3361 [(set_attr "type" "ssemov")
3362 (set_attr "prefix" "vex")
3363 (set_attr "mode" "V4SF,V2SF,V2SF")])
3365 (define_insn "sse_movlhps"
3366 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3369 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3370 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3371 (parallel [(const_int 0)
3375 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3377 movlhps\t{%2, %0|%0, %2}
3378 movhps\t{%2, %0|%0, %2}
3379 movlps\t{%2, %H0|%H0, %2}"
3380 [(set_attr "type" "ssemov")
3381 (set_attr "mode" "V4SF,V2SF,V2SF")])
3383 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3384 (define_insn "avx_unpckhps256"
3385 [(set (match_operand:V8SF 0 "register_operand" "=x")
3388 (match_operand:V8SF 1 "register_operand" "x")
3389 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3390 (parallel [(const_int 2) (const_int 10)
3391 (const_int 3) (const_int 11)
3392 (const_int 6) (const_int 14)
3393 (const_int 7) (const_int 15)])))]
3395 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3396 [(set_attr "type" "sselog")
3397 (set_attr "prefix" "vex")
3398 (set_attr "mode" "V8SF")])
3400 (define_insn "*avx_interleave_highv4sf"
3401 [(set (match_operand:V4SF 0 "register_operand" "=x")
3404 (match_operand:V4SF 1 "register_operand" "x")
3405 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3406 (parallel [(const_int 2) (const_int 6)
3407 (const_int 3) (const_int 7)])))]
3409 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3410 [(set_attr "type" "sselog")
3411 (set_attr "prefix" "vex")
3412 (set_attr "mode" "V4SF")])
3414 (define_expand "vec_interleave_highv8sf"
3418 (match_operand:V8SF 1 "register_operand" "x")
3419 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3420 (parallel [(const_int 0) (const_int 8)
3421 (const_int 1) (const_int 9)
3422 (const_int 4) (const_int 12)
3423 (const_int 5) (const_int 13)])))
3429 (parallel [(const_int 2) (const_int 10)
3430 (const_int 3) (const_int 11)
3431 (const_int 6) (const_int 14)
3432 (const_int 7) (const_int 15)])))
3433 (set (match_operand:V8SF 0 "register_operand" "")
3438 (parallel [(const_int 4) (const_int 5)
3439 (const_int 6) (const_int 7)
3440 (const_int 12) (const_int 13)
3441 (const_int 14) (const_int 15)])))]
3444 operands[3] = gen_reg_rtx (V8SFmode);
3445 operands[4] = gen_reg_rtx (V8SFmode);
3448 (define_insn "vec_interleave_highv4sf"
3449 [(set (match_operand:V4SF 0 "register_operand" "=x")
3452 (match_operand:V4SF 1 "register_operand" "0")
3453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3454 (parallel [(const_int 2) (const_int 6)
3455 (const_int 3) (const_int 7)])))]
3457 "unpckhps\t{%2, %0|%0, %2}"
3458 [(set_attr "type" "sselog")
3459 (set_attr "mode" "V4SF")])
3461 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3462 (define_insn "avx_unpcklps256"
3463 [(set (match_operand:V8SF 0 "register_operand" "=x")
3466 (match_operand:V8SF 1 "register_operand" "x")
3467 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3468 (parallel [(const_int 0) (const_int 8)
3469 (const_int 1) (const_int 9)
3470 (const_int 4) (const_int 12)
3471 (const_int 5) (const_int 13)])))]
3473 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3474 [(set_attr "type" "sselog")
3475 (set_attr "prefix" "vex")
3476 (set_attr "mode" "V8SF")])
3478 (define_insn "*avx_interleave_lowv4sf"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3482 (match_operand:V4SF 1 "register_operand" "x")
3483 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3484 (parallel [(const_int 0) (const_int 4)
3485 (const_int 1) (const_int 5)])))]
3487 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3488 [(set_attr "type" "sselog")
3489 (set_attr "prefix" "vex")
3490 (set_attr "mode" "V4SF")])
3492 (define_expand "vec_interleave_lowv8sf"
3496 (match_operand:V8SF 1 "register_operand" "x")
3497 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3498 (parallel [(const_int 0) (const_int 8)
3499 (const_int 1) (const_int 9)
3500 (const_int 4) (const_int 12)
3501 (const_int 5) (const_int 13)])))
3507 (parallel [(const_int 2) (const_int 10)
3508 (const_int 3) (const_int 11)
3509 (const_int 6) (const_int 14)
3510 (const_int 7) (const_int 15)])))
3511 (set (match_operand:V8SF 0 "register_operand" "")
3516 (parallel [(const_int 0) (const_int 1)
3517 (const_int 2) (const_int 3)
3518 (const_int 8) (const_int 9)
3519 (const_int 10) (const_int 11)])))]
3522 operands[3] = gen_reg_rtx (V8SFmode);
3523 operands[4] = gen_reg_rtx (V8SFmode);
3526 (define_insn "vec_interleave_lowv4sf"
3527 [(set (match_operand:V4SF 0 "register_operand" "=x")
3530 (match_operand:V4SF 1 "register_operand" "0")
3531 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3532 (parallel [(const_int 0) (const_int 4)
3533 (const_int 1) (const_int 5)])))]
3535 "unpcklps\t{%2, %0|%0, %2}"
3536 [(set_attr "type" "sselog")
3537 (set_attr "mode" "V4SF")])
3539 ;; These are modeled with the same vec_concat as the others so that we
3540 ;; capture users of shufps that can use the new instructions
3541 (define_insn "avx_movshdup256"
3542 [(set (match_operand:V8SF 0 "register_operand" "=x")
3545 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3547 (parallel [(const_int 1) (const_int 1)
3548 (const_int 3) (const_int 3)
3549 (const_int 5) (const_int 5)
3550 (const_int 7) (const_int 7)])))]
3552 "vmovshdup\t{%1, %0|%0, %1}"
3553 [(set_attr "type" "sse")
3554 (set_attr "prefix" "vex")
3555 (set_attr "mode" "V8SF")])
3557 (define_insn "sse3_movshdup"
3558 [(set (match_operand:V4SF 0 "register_operand" "=x")
3561 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3563 (parallel [(const_int 1)
3568 "%vmovshdup\t{%1, %0|%0, %1}"
3569 [(set_attr "type" "sse")
3570 (set_attr "prefix_rep" "1")
3571 (set_attr "prefix" "maybe_vex")
3572 (set_attr "mode" "V4SF")])
3574 (define_insn "avx_movsldup256"
3575 [(set (match_operand:V8SF 0 "register_operand" "=x")
3578 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3580 (parallel [(const_int 0) (const_int 0)
3581 (const_int 2) (const_int 2)
3582 (const_int 4) (const_int 4)
3583 (const_int 6) (const_int 6)])))]
3585 "vmovsldup\t{%1, %0|%0, %1}"
3586 [(set_attr "type" "sse")
3587 (set_attr "prefix" "vex")
3588 (set_attr "mode" "V8SF")])
3590 (define_insn "sse3_movsldup"
3591 [(set (match_operand:V4SF 0 "register_operand" "=x")
3594 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3596 (parallel [(const_int 0)
3601 "%vmovsldup\t{%1, %0|%0, %1}"
3602 [(set_attr "type" "sse")
3603 (set_attr "prefix_rep" "1")
3604 (set_attr "prefix" "maybe_vex")
3605 (set_attr "mode" "V4SF")])
3607 (define_expand "avx_shufps256"
3608 [(match_operand:V8SF 0 "register_operand" "")
3609 (match_operand:V8SF 1 "register_operand" "")
3610 (match_operand:V8SF 2 "nonimmediate_operand" "")
3611 (match_operand:SI 3 "const_int_operand" "")]
3614 int mask = INTVAL (operands[3]);
3615 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3616 GEN_INT ((mask >> 0) & 3),
3617 GEN_INT ((mask >> 2) & 3),
3618 GEN_INT (((mask >> 4) & 3) + 8),
3619 GEN_INT (((mask >> 6) & 3) + 8),
3620 GEN_INT (((mask >> 0) & 3) + 4),
3621 GEN_INT (((mask >> 2) & 3) + 4),
3622 GEN_INT (((mask >> 4) & 3) + 12),
3623 GEN_INT (((mask >> 6) & 3) + 12)));
3627 ;; One bit in mask selects 2 elements.
3628 (define_insn "avx_shufps256_1"
3629 [(set (match_operand:V8SF 0 "register_operand" "=x")
3632 (match_operand:V8SF 1 "register_operand" "x")
3633 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3634 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3635 (match_operand 4 "const_0_to_3_operand" "")
3636 (match_operand 5 "const_8_to_11_operand" "")
3637 (match_operand 6 "const_8_to_11_operand" "")
3638 (match_operand 7 "const_4_to_7_operand" "")
3639 (match_operand 8 "const_4_to_7_operand" "")
3640 (match_operand 9 "const_12_to_15_operand" "")
3641 (match_operand 10 "const_12_to_15_operand" "")])))]
3643 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3644 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3645 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3646 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3649 mask = INTVAL (operands[3]);
3650 mask |= INTVAL (operands[4]) << 2;
3651 mask |= (INTVAL (operands[5]) - 8) << 4;
3652 mask |= (INTVAL (operands[6]) - 8) << 6;
3653 operands[3] = GEN_INT (mask);
3655 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3657 [(set_attr "type" "sselog")
3658 (set_attr "length_immediate" "1")
3659 (set_attr "prefix" "vex")
3660 (set_attr "mode" "V8SF")])
3662 (define_expand "sse_shufps"
3663 [(match_operand:V4SF 0 "register_operand" "")
3664 (match_operand:V4SF 1 "register_operand" "")
3665 (match_operand:V4SF 2 "nonimmediate_operand" "")
3666 (match_operand:SI 3 "const_int_operand" "")]
3669 int mask = INTVAL (operands[3]);
3670 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3671 GEN_INT ((mask >> 0) & 3),
3672 GEN_INT ((mask >> 2) & 3),
3673 GEN_INT (((mask >> 4) & 3) + 4),
3674 GEN_INT (((mask >> 6) & 3) + 4)));
3678 (define_insn "*avx_shufps_<mode>"
3679 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3680 (vec_select:SSEMODE4S
3681 (vec_concat:<ssedoublesizemode>
3682 (match_operand:SSEMODE4S 1 "register_operand" "x")
3683 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3684 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3685 (match_operand 4 "const_0_to_3_operand" "")
3686 (match_operand 5 "const_4_to_7_operand" "")
3687 (match_operand 6 "const_4_to_7_operand" "")])))]
3691 mask |= INTVAL (operands[3]) << 0;
3692 mask |= INTVAL (operands[4]) << 2;
3693 mask |= (INTVAL (operands[5]) - 4) << 4;
3694 mask |= (INTVAL (operands[6]) - 4) << 6;
3695 operands[3] = GEN_INT (mask);
3697 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3699 [(set_attr "type" "sselog")
3700 (set_attr "length_immediate" "1")
3701 (set_attr "prefix" "vex")
3702 (set_attr "mode" "V4SF")])
3704 (define_insn "sse_shufps_<mode>"
3705 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3706 (vec_select:SSEMODE4S
3707 (vec_concat:<ssedoublesizemode>
3708 (match_operand:SSEMODE4S 1 "register_operand" "0")
3709 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3710 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3711 (match_operand 4 "const_0_to_3_operand" "")
3712 (match_operand 5 "const_4_to_7_operand" "")
3713 (match_operand 6 "const_4_to_7_operand" "")])))]
3717 mask |= INTVAL (operands[3]) << 0;
3718 mask |= INTVAL (operands[4]) << 2;
3719 mask |= (INTVAL (operands[5]) - 4) << 4;
3720 mask |= (INTVAL (operands[6]) - 4) << 6;
3721 operands[3] = GEN_INT (mask);
3723 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3725 [(set_attr "type" "sselog")
3726 (set_attr "length_immediate" "1")
3727 (set_attr "mode" "V4SF")])
3729 (define_insn "sse_storehps"
3730 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3732 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3733 (parallel [(const_int 2) (const_int 3)])))]
3736 %vmovhps\t{%1, %0|%0, %1}
3737 %vmovhlps\t{%1, %d0|%d0, %1}
3738 %vmovlps\t{%H1, %d0|%d0, %H1}"
3739 [(set_attr "type" "ssemov")
3740 (set_attr "prefix" "maybe_vex")
3741 (set_attr "mode" "V2SF,V4SF,V2SF")])
3743 (define_expand "sse_loadhps_exp"
3744 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3747 (match_operand:V4SF 1 "nonimmediate_operand" "")
3748 (parallel [(const_int 0) (const_int 1)]))
3749 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3752 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3754 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3756 /* Fix up the destination if needed. */
3757 if (dst != operands[0])
3758 emit_move_insn (operands[0], dst);
3763 (define_insn "*avx_loadhps"
3764 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3767 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3768 (parallel [(const_int 0) (const_int 1)]))
3769 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3772 vmovhps\t{%2, %1, %0|%0, %1, %2}
3773 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3774 vmovlps\t{%2, %H0|%H0, %2}"
3775 [(set_attr "type" "ssemov")
3776 (set_attr "prefix" "vex")
3777 (set_attr "mode" "V2SF,V4SF,V2SF")])
3779 (define_insn "sse_loadhps"
3780 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3783 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3784 (parallel [(const_int 0) (const_int 1)]))
3785 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3788 movhps\t{%2, %0|%0, %2}
3789 movlhps\t{%2, %0|%0, %2}
3790 movlps\t{%2, %H0|%H0, %2}"
3791 [(set_attr "type" "ssemov")
3792 (set_attr "mode" "V2SF,V4SF,V2SF")])
3794 (define_insn "*avx_storelps"
3795 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3797 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3798 (parallel [(const_int 0) (const_int 1)])))]
3801 vmovlps\t{%1, %0|%0, %1}
3802 vmovaps\t{%1, %0|%0, %1}
3803 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3804 [(set_attr "type" "ssemov")
3805 (set_attr "prefix" "vex")
3806 (set_attr "mode" "V2SF,V2DF,V2SF")])
3808 (define_insn "sse_storelps"
3809 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3811 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3812 (parallel [(const_int 0) (const_int 1)])))]
3815 movlps\t{%1, %0|%0, %1}
3816 movaps\t{%1, %0|%0, %1}
3817 movlps\t{%1, %0|%0, %1}"
3818 [(set_attr "type" "ssemov")
3819 (set_attr "mode" "V2SF,V4SF,V2SF")])
3821 (define_expand "sse_loadlps_exp"
3822 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3824 (match_operand:V2SF 2 "nonimmediate_operand" "")
3826 (match_operand:V4SF 1 "nonimmediate_operand" "")
3827 (parallel [(const_int 2) (const_int 3)]))))]
3830 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3832 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3834 /* Fix up the destination if needed. */
3835 if (dst != operands[0])
3836 emit_move_insn (operands[0], dst);
3841 (define_insn "*avx_loadlps"
3842 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3844 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3846 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3847 (parallel [(const_int 2) (const_int 3)]))))]
3850 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3851 vmovlps\t{%2, %1, %0|%0, %1, %2}
3852 vmovlps\t{%2, %0|%0, %2}"
3853 [(set_attr "type" "sselog,ssemov,ssemov")
3854 (set_attr "length_immediate" "1,*,*")
3855 (set_attr "prefix" "vex")
3856 (set_attr "mode" "V4SF,V2SF,V2SF")])
3858 (define_insn "sse_loadlps"
3859 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3861 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3863 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3864 (parallel [(const_int 2) (const_int 3)]))))]
3867 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3868 movlps\t{%2, %0|%0, %2}
3869 movlps\t{%2, %0|%0, %2}"
3870 [(set_attr "type" "sselog,ssemov,ssemov")
3871 (set_attr "length_immediate" "1,*,*")
3872 (set_attr "mode" "V4SF,V2SF,V2SF")])
3874 (define_insn "*avx_movss"
3875 [(set (match_operand:V4SF 0 "register_operand" "=x")
3877 (match_operand:V4SF 2 "register_operand" "x")
3878 (match_operand:V4SF 1 "register_operand" "x")
3881 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3882 [(set_attr "type" "ssemov")
3883 (set_attr "prefix" "vex")
3884 (set_attr "mode" "SF")])
3886 (define_insn "sse_movss"
3887 [(set (match_operand:V4SF 0 "register_operand" "=x")
3889 (match_operand:V4SF 2 "register_operand" "x")
3890 (match_operand:V4SF 1 "register_operand" "0")
3893 "movss\t{%2, %0|%0, %2}"
3894 [(set_attr "type" "ssemov")
3895 (set_attr "mode" "SF")])
3897 (define_expand "vec_dupv4sf"
3898 [(set (match_operand:V4SF 0 "register_operand" "")
3900 (match_operand:SF 1 "nonimmediate_operand" "")))]
3904 operands[1] = force_reg (V4SFmode, operands[1]);
3907 (define_insn "*vec_dupv4sf_avx"
3908 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3910 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3913 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3914 vbroadcastss\t{%1, %0|%0, %1}"
3915 [(set_attr "type" "sselog1,ssemov")
3916 (set_attr "length_immediate" "1,0")
3917 (set_attr "prefix_extra" "0,1")
3918 (set_attr "prefix" "vex")
3919 (set_attr "mode" "V4SF")])
3921 (define_insn "*vec_dupv4sf"
3922 [(set (match_operand:V4SF 0 "register_operand" "=x")
3924 (match_operand:SF 1 "register_operand" "0")))]
3926 "shufps\t{$0, %0, %0|%0, %0, 0}"
3927 [(set_attr "type" "sselog1")
3928 (set_attr "length_immediate" "1")
3929 (set_attr "mode" "V4SF")])
3931 (define_insn "*vec_concatv2sf_avx"
3932 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3934 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3935 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3938 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3939 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3940 vmovss\t{%1, %0|%0, %1}
3941 punpckldq\t{%2, %0|%0, %2}
3942 movd\t{%1, %0|%0, %1}"
3943 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3944 (set_attr "length_immediate" "*,1,*,*,*")
3945 (set_attr "prefix_extra" "*,1,*,*,*")
3946 (set (attr "prefix")
3947 (if_then_else (eq_attr "alternative" "3,4")
3948 (const_string "orig")
3949 (const_string "vex")))
3950 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3952 ;; Although insertps takes register source, we prefer
3953 ;; unpcklps with register source since it is shorter.
3954 (define_insn "*vec_concatv2sf_sse4_1"
3955 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3957 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3958 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3961 unpcklps\t{%2, %0|%0, %2}
3962 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3963 movss\t{%1, %0|%0, %1}
3964 punpckldq\t{%2, %0|%0, %2}
3965 movd\t{%1, %0|%0, %1}"
3966 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3967 (set_attr "prefix_data16" "*,1,*,*,*")
3968 (set_attr "prefix_extra" "*,1,*,*,*")
3969 (set_attr "length_immediate" "*,1,*,*,*")
3970 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3972 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3973 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3974 ;; alternatives pretty much forces the MMX alternative to be chosen.
3975 (define_insn "*vec_concatv2sf_sse"
3976 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3978 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3979 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3982 unpcklps\t{%2, %0|%0, %2}
3983 movss\t{%1, %0|%0, %1}
3984 punpckldq\t{%2, %0|%0, %2}
3985 movd\t{%1, %0|%0, %1}"
3986 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3987 (set_attr "mode" "V4SF,SF,DI,DI")])
3989 (define_insn "*vec_concatv4sf_avx"
3990 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3992 (match_operand:V2SF 1 "register_operand" " x,x")
3993 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3996 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3997 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3998 [(set_attr "type" "ssemov")
3999 (set_attr "prefix" "vex")
4000 (set_attr "mode" "V4SF,V2SF")])
4002 (define_insn "*vec_concatv4sf_sse"
4003 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4005 (match_operand:V2SF 1 "register_operand" " 0,0")
4006 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4009 movlhps\t{%2, %0|%0, %2}
4010 movhps\t{%2, %0|%0, %2}"
4011 [(set_attr "type" "ssemov")
4012 (set_attr "mode" "V4SF,V2SF")])
4014 (define_expand "vec_init<mode>"
4015 [(match_operand:SSEMODE 0 "register_operand" "")
4016 (match_operand 1 "" "")]
4019 ix86_expand_vector_init (false, operands[0], operands[1]);
4023 (define_insn "*vec_set<mode>_0_avx"
4024 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4025 (vec_merge:SSEMODE4S
4026 (vec_duplicate:SSEMODE4S
4027 (match_operand:<ssescalarmode> 2
4028 "general_operand" " x,m,*r,x,*rm,x*rfF"))
4029 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
4033 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
4034 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4035 vmovd\t{%2, %0|%0, %2}
4036 vmovss\t{%2, %1, %0|%0, %1, %2}
4037 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4039 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4040 (set_attr "prefix_extra" "*,*,*,*,1,*")
4041 (set_attr "length_immediate" "*,*,*,*,1,*")
4042 (set_attr "prefix" "vex")
4043 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4045 (define_insn "*vec_set<mode>_0_sse4_1"
4046 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4047 (vec_merge:SSEMODE4S
4048 (vec_duplicate:SSEMODE4S
4049 (match_operand:<ssescalarmode> 2
4050 "general_operand" " x,m,*r,x,*rm,*rfF"))
4051 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4055 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4056 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4057 movd\t{%2, %0|%0, %2}
4058 movss\t{%2, %0|%0, %2}
4059 pinsrd\t{$0, %2, %0|%0, %2, 0}
4061 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4062 (set_attr "prefix_extra" "*,*,*,*,1,*")
4063 (set_attr "length_immediate" "*,*,*,*,1,*")
4064 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4066 (define_insn "*vec_set<mode>_0_sse2"
4067 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4068 (vec_merge:SSEMODE4S
4069 (vec_duplicate:SSEMODE4S
4070 (match_operand:<ssescalarmode> 2
4071 "general_operand" " m,*r,x,x*rfF"))
4072 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4076 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4077 movd\t{%2, %0|%0, %2}
4078 movss\t{%2, %0|%0, %2}
4080 [(set_attr "type" "ssemov")
4081 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4083 (define_insn "vec_set<mode>_0"
4084 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4085 (vec_merge:SSEMODE4S
4086 (vec_duplicate:SSEMODE4S
4087 (match_operand:<ssescalarmode> 2
4088 "general_operand" " m,x,x*rfF"))
4089 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4093 movss\t{%2, %0|%0, %2}
4094 movss\t{%2, %0|%0, %2}
4096 [(set_attr "type" "ssemov")
4097 (set_attr "mode" "SF,SF,*")])
4099 ;; A subset is vec_setv4sf.
4100 (define_insn "*vec_setv4sf_avx"
4101 [(set (match_operand:V4SF 0 "register_operand" "=x")
4104 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4105 (match_operand:V4SF 1 "register_operand" "x")
4106 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4109 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4110 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4112 [(set_attr "type" "sselog")
4113 (set_attr "prefix_extra" "1")
4114 (set_attr "length_immediate" "1")
4115 (set_attr "prefix" "vex")
4116 (set_attr "mode" "V4SF")])
4118 (define_insn "*vec_setv4sf_sse4_1"
4119 [(set (match_operand:V4SF 0 "register_operand" "=x")
4122 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4123 (match_operand:V4SF 1 "register_operand" "0")
4124 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4127 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4128 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4130 [(set_attr "type" "sselog")
4131 (set_attr "prefix_data16" "1")
4132 (set_attr "prefix_extra" "1")
4133 (set_attr "length_immediate" "1")
4134 (set_attr "mode" "V4SF")])
4136 (define_insn "*avx_insertps"
4137 [(set (match_operand:V4SF 0 "register_operand" "=x")
4138 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4139 (match_operand:V4SF 1 "register_operand" "x")
4140 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4143 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4144 [(set_attr "type" "sselog")
4145 (set_attr "prefix" "vex")
4146 (set_attr "prefix_extra" "1")
4147 (set_attr "length_immediate" "1")
4148 (set_attr "mode" "V4SF")])
4150 (define_insn "sse4_1_insertps"
4151 [(set (match_operand:V4SF 0 "register_operand" "=x")
4152 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4153 (match_operand:V4SF 1 "register_operand" "0")
4154 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4157 "insertps\t{%3, %2, %0|%0, %2, %3}";
4158 [(set_attr "type" "sselog")
4159 (set_attr "prefix_data16" "1")
4160 (set_attr "prefix_extra" "1")
4161 (set_attr "length_immediate" "1")
4162 (set_attr "mode" "V4SF")])
4165 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4166 (vec_merge:SSEMODE4S
4167 (vec_duplicate:SSEMODE4S
4168 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4171 "TARGET_SSE && reload_completed"
4174 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4179 (define_expand "vec_set<mode>"
4180 [(match_operand:SSEMODE 0 "register_operand" "")
4181 (match_operand:<ssescalarmode> 1 "register_operand" "")
4182 (match_operand 2 "const_int_operand" "")]
4185 ix86_expand_vector_set (false, operands[0], operands[1],
4186 INTVAL (operands[2]));
4190 (define_insn_and_split "*vec_extractv4sf_0"
4191 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4193 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4194 (parallel [(const_int 0)])))]
4195 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4197 "&& reload_completed"
4200 rtx op1 = operands[1];
4202 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4204 op1 = gen_lowpart (SFmode, op1);
4205 emit_move_insn (operands[0], op1);
4209 (define_expand "avx_vextractf128<mode>"
4210 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4211 (match_operand:AVX256MODE 1 "register_operand" "")
4212 (match_operand:SI 2 "const_0_to_1_operand" "")]
4215 switch (INTVAL (operands[2]))
4218 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4221 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4229 (define_insn_and_split "vec_extract_lo_<mode>"
4230 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4231 (vec_select:<avxhalfvecmode>
4232 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4233 (parallel [(const_int 0) (const_int 1)])))]
4236 "&& reload_completed"
4239 rtx op1 = operands[1];
4241 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4243 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4244 emit_move_insn (operands[0], op1);
4248 (define_insn "vec_extract_hi_<mode>"
4249 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4250 (vec_select:<avxhalfvecmode>
4251 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4252 (parallel [(const_int 2) (const_int 3)])))]
4254 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4255 [(set_attr "type" "sselog")
4256 (set_attr "prefix_extra" "1")
4257 (set_attr "length_immediate" "1")
4258 (set_attr "memory" "none,store")
4259 (set_attr "prefix" "vex")
4260 (set_attr "mode" "V8SF")])
4262 (define_insn_and_split "vec_extract_lo_<mode>"
4263 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4264 (vec_select:<avxhalfvecmode>
4265 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4266 (parallel [(const_int 0) (const_int 1)
4267 (const_int 2) (const_int 3)])))]
4270 "&& reload_completed"
4273 rtx op1 = operands[1];
4275 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4277 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4278 emit_move_insn (operands[0], op1);
4282 (define_insn "vec_extract_hi_<mode>"
4283 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4284 (vec_select:<avxhalfvecmode>
4285 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4286 (parallel [(const_int 4) (const_int 5)
4287 (const_int 6) (const_int 7)])))]
4289 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4290 [(set_attr "type" "sselog")
4291 (set_attr "prefix_extra" "1")
4292 (set_attr "length_immediate" "1")
4293 (set_attr "memory" "none,store")
4294 (set_attr "prefix" "vex")
4295 (set_attr "mode" "V8SF")])
4297 (define_insn_and_split "vec_extract_lo_v16hi"
4298 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4300 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4301 (parallel [(const_int 0) (const_int 1)
4302 (const_int 2) (const_int 3)
4303 (const_int 4) (const_int 5)
4304 (const_int 6) (const_int 7)])))]
4307 "&& reload_completed"
4310 rtx op1 = operands[1];
4312 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4314 op1 = gen_lowpart (V8HImode, op1);
4315 emit_move_insn (operands[0], op1);
4319 (define_insn "vec_extract_hi_v16hi"
4320 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4322 (match_operand:V16HI 1 "register_operand" "x,x")
4323 (parallel [(const_int 8) (const_int 9)
4324 (const_int 10) (const_int 11)
4325 (const_int 12) (const_int 13)
4326 (const_int 14) (const_int 15)])))]
4328 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4329 [(set_attr "type" "sselog")
4330 (set_attr "prefix_extra" "1")
4331 (set_attr "length_immediate" "1")
4332 (set_attr "memory" "none,store")
4333 (set_attr "prefix" "vex")
4334 (set_attr "mode" "V8SF")])
4336 (define_insn_and_split "vec_extract_lo_v32qi"
4337 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4339 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4340 (parallel [(const_int 0) (const_int 1)
4341 (const_int 2) (const_int 3)
4342 (const_int 4) (const_int 5)
4343 (const_int 6) (const_int 7)
4344 (const_int 8) (const_int 9)
4345 (const_int 10) (const_int 11)
4346 (const_int 12) (const_int 13)
4347 (const_int 14) (const_int 15)])))]
4350 "&& reload_completed"
4353 rtx op1 = operands[1];
4355 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4357 op1 = gen_lowpart (V16QImode, op1);
4358 emit_move_insn (operands[0], op1);
4362 (define_insn "vec_extract_hi_v32qi"
4363 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4365 (match_operand:V32QI 1 "register_operand" "x,x")
4366 (parallel [(const_int 16) (const_int 17)
4367 (const_int 18) (const_int 19)
4368 (const_int 20) (const_int 21)
4369 (const_int 22) (const_int 23)
4370 (const_int 24) (const_int 25)
4371 (const_int 26) (const_int 27)
4372 (const_int 28) (const_int 29)
4373 (const_int 30) (const_int 31)])))]
4375 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4376 [(set_attr "type" "sselog")
4377 (set_attr "prefix_extra" "1")
4378 (set_attr "length_immediate" "1")
4379 (set_attr "memory" "none,store")
4380 (set_attr "prefix" "vex")
4381 (set_attr "mode" "V8SF")])
4383 (define_insn "*sse4_1_extractps"
4384 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4386 (match_operand:V4SF 1 "register_operand" "x")
4387 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4389 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4390 [(set_attr "type" "sselog")
4391 (set_attr "prefix_data16" "1")
4392 (set_attr "prefix_extra" "1")
4393 (set_attr "length_immediate" "1")
4394 (set_attr "prefix" "maybe_vex")
4395 (set_attr "mode" "V4SF")])
4397 (define_insn_and_split "*vec_extract_v4sf_mem"
4398 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4400 (match_operand:V4SF 1 "memory_operand" "o")
4401 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4407 int i = INTVAL (operands[2]);
4409 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4413 (define_expand "vec_extract<mode>"
4414 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4415 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4416 (match_operand 2 "const_int_operand" "")]
4419 ix86_expand_vector_extract (false, operands[0], operands[1],
4420 INTVAL (operands[2]));
4424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4426 ;; Parallel double-precision floating point element swizzling
4428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4430 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4431 (define_insn "avx_unpckhpd256"
4432 [(set (match_operand:V4DF 0 "register_operand" "=x")
4435 (match_operand:V4DF 1 "register_operand" "x")
4436 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4437 (parallel [(const_int 1) (const_int 5)
4438 (const_int 3) (const_int 7)])))]
4440 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4441 [(set_attr "type" "sselog")
4442 (set_attr "prefix" "vex")
4443 (set_attr "mode" "V4DF")])
4445 (define_expand "vec_interleave_highv4df"
4449 (match_operand:V4DF 1 "register_operand" "x")
4450 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4451 (parallel [(const_int 0) (const_int 4)
4452 (const_int 2) (const_int 6)])))
4458 (parallel [(const_int 1) (const_int 5)
4459 (const_int 3) (const_int 7)])))
4460 (set (match_operand:V4DF 0 "register_operand" "")
4465 (parallel [(const_int 2) (const_int 3)
4466 (const_int 6) (const_int 7)])))]
4469 operands[3] = gen_reg_rtx (V4DFmode);
4470 operands[4] = gen_reg_rtx (V4DFmode);
4474 (define_expand "vec_interleave_highv2df"
4475 [(set (match_operand:V2DF 0 "register_operand" "")
4478 (match_operand:V2DF 1 "nonimmediate_operand" "")
4479 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4480 (parallel [(const_int 1)
4484 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4485 operands[2] = force_reg (V2DFmode, operands[2]);
4488 (define_insn "*avx_interleave_highv2df"
4489 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4492 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4493 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4494 (parallel [(const_int 1)
4496 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4498 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4499 vmovddup\t{%H1, %0|%0, %H1}
4500 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4501 vmovhpd\t{%1, %0|%0, %1}"
4502 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4503 (set_attr "prefix" "vex")
4504 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4506 (define_insn "*sse3_interleave_highv2df"
4507 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4510 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4512 (parallel [(const_int 1)
4514 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4516 unpckhpd\t{%2, %0|%0, %2}
4517 movddup\t{%H1, %0|%0, %H1}
4518 movlpd\t{%H1, %0|%0, %H1}
4519 movhpd\t{%1, %0|%0, %1}"
4520 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4521 (set_attr "prefix_data16" "*,*,1,1")
4522 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4524 (define_insn "*sse2_interleave_highv2df"
4525 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4528 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4529 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4530 (parallel [(const_int 1)
4532 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4534 unpckhpd\t{%2, %0|%0, %2}
4535 movlpd\t{%H1, %0|%0, %H1}
4536 movhpd\t{%1, %0|%0, %1}"
4537 [(set_attr "type" "sselog,ssemov,ssemov")
4538 (set_attr "prefix_data16" "*,1,1")
4539 (set_attr "mode" "V2DF,V1DF,V1DF")])
4541 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4542 (define_expand "avx_movddup256"
4543 [(set (match_operand:V4DF 0 "register_operand" "")
4546 (match_operand:V4DF 1 "nonimmediate_operand" "")
4548 (parallel [(const_int 0) (const_int 4)
4549 (const_int 2) (const_int 6)])))]
4552 (define_expand "avx_unpcklpd256"
4553 [(set (match_operand:V4DF 0 "register_operand" "")
4556 (match_operand:V4DF 1 "register_operand" "")
4557 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4558 (parallel [(const_int 0) (const_int 4)
4559 (const_int 2) (const_int 6)])))]
4562 (define_insn "*avx_unpcklpd256"
4563 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4566 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4567 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4568 (parallel [(const_int 0) (const_int 4)
4569 (const_int 2) (const_int 6)])))]
4571 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4573 vmovddup\t{%1, %0|%0, %1}
4574 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4575 [(set_attr "type" "sselog")
4576 (set_attr "prefix" "vex")
4577 (set_attr "mode" "V4DF")])
4579 (define_expand "vec_interleave_lowv4df"
4583 (match_operand:V4DF 1 "register_operand" "x")
4584 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4585 (parallel [(const_int 0) (const_int 4)
4586 (const_int 2) (const_int 6)])))
4592 (parallel [(const_int 1) (const_int 5)
4593 (const_int 3) (const_int 7)])))
4594 (set (match_operand:V4DF 0 "register_operand" "")
4599 (parallel [(const_int 0) (const_int 1)
4600 (const_int 4) (const_int 5)])))]
4603 operands[3] = gen_reg_rtx (V4DFmode);
4604 operands[4] = gen_reg_rtx (V4DFmode);
4607 (define_expand "vec_interleave_lowv2df"
4608 [(set (match_operand:V2DF 0 "register_operand" "")
4611 (match_operand:V2DF 1 "nonimmediate_operand" "")
4612 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4613 (parallel [(const_int 0)
4617 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4618 operands[1] = force_reg (V2DFmode, operands[1]);
4621 (define_insn "*avx_interleave_lowv2df"
4622 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4625 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4626 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4627 (parallel [(const_int 0)
4629 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4631 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4632 vmovddup\t{%1, %0|%0, %1}
4633 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4634 vmovlpd\t{%2, %H0|%H0, %2}"
4635 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4636 (set_attr "prefix" "vex")
4637 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4639 (define_insn "*sse3_interleave_lowv2df"
4640 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4643 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4644 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4645 (parallel [(const_int 0)
4647 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4649 unpcklpd\t{%2, %0|%0, %2}
4650 movddup\t{%1, %0|%0, %1}
4651 movhpd\t{%2, %0|%0, %2}
4652 movlpd\t{%2, %H0|%H0, %2}"
4653 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4654 (set_attr "prefix_data16" "*,*,1,1")
4655 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4657 (define_insn "*sse2_interleave_lowv2df"
4658 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4661 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4662 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4663 (parallel [(const_int 0)
4665 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4667 unpcklpd\t{%2, %0|%0, %2}
4668 movhpd\t{%2, %0|%0, %2}
4669 movlpd\t{%2, %H0|%H0, %2}"
4670 [(set_attr "type" "sselog,ssemov,ssemov")
4671 (set_attr "prefix_data16" "*,1,1")
4672 (set_attr "mode" "V2DF,V1DF,V1DF")])
4675 [(set (match_operand:V2DF 0 "memory_operand" "")
4678 (match_operand:V2DF 1 "register_operand" "")
4680 (parallel [(const_int 0)
4682 "TARGET_SSE3 && reload_completed"
4685 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4686 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4687 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4692 [(set (match_operand:V2DF 0 "register_operand" "")
4695 (match_operand:V2DF 1 "memory_operand" "")
4697 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4698 (match_operand:SI 3 "const_int_operand" "")])))]
4699 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4700 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4702 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4705 (define_expand "avx_shufpd256"
4706 [(match_operand:V4DF 0 "register_operand" "")
4707 (match_operand:V4DF 1 "register_operand" "")
4708 (match_operand:V4DF 2 "nonimmediate_operand" "")
4709 (match_operand:SI 3 "const_int_operand" "")]
4712 int mask = INTVAL (operands[3]);
4713 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4715 GEN_INT (mask & 2 ? 5 : 4),
4716 GEN_INT (mask & 4 ? 3 : 2),
4717 GEN_INT (mask & 8 ? 7 : 6)));
4721 (define_insn "avx_shufpd256_1"
4722 [(set (match_operand:V4DF 0 "register_operand" "=x")
4725 (match_operand:V4DF 1 "register_operand" "x")
4726 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4727 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4728 (match_operand 4 "const_4_to_5_operand" "")
4729 (match_operand 5 "const_2_to_3_operand" "")
4730 (match_operand 6 "const_6_to_7_operand" "")])))]
4734 mask = INTVAL (operands[3]);
4735 mask |= (INTVAL (operands[4]) - 4) << 1;
4736 mask |= (INTVAL (operands[5]) - 2) << 2;
4737 mask |= (INTVAL (operands[6]) - 6) << 3;
4738 operands[3] = GEN_INT (mask);
4740 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4742 [(set_attr "type" "sselog")
4743 (set_attr "length_immediate" "1")
4744 (set_attr "prefix" "vex")
4745 (set_attr "mode" "V4DF")])
4747 (define_expand "sse2_shufpd"
4748 [(match_operand:V2DF 0 "register_operand" "")
4749 (match_operand:V2DF 1 "register_operand" "")
4750 (match_operand:V2DF 2 "nonimmediate_operand" "")
4751 (match_operand:SI 3 "const_int_operand" "")]
4754 int mask = INTVAL (operands[3]);
4755 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4757 GEN_INT (mask & 2 ? 3 : 2)));
4761 (define_expand "vec_extract_even<mode>"
4762 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4763 (match_operand:SSEMODE_EO 1 "register_operand" "")
4764 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4767 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4771 (define_expand "vec_extract_odd<mode>"
4772 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4773 (match_operand:SSEMODE_EO 1 "register_operand" "")
4774 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4777 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4781 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4782 (define_insn "*avx_interleave_highv2di"
4783 [(set (match_operand:V2DI 0 "register_operand" "=x")
4786 (match_operand:V2DI 1 "register_operand" "x")
4787 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4788 (parallel [(const_int 1)
4791 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4792 [(set_attr "type" "sselog")
4793 (set_attr "prefix" "vex")
4794 (set_attr "mode" "TI")])
4796 (define_insn "vec_interleave_highv2di"
4797 [(set (match_operand:V2DI 0 "register_operand" "=x")
4800 (match_operand:V2DI 1 "register_operand" "0")
4801 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4802 (parallel [(const_int 1)
4805 "punpckhqdq\t{%2, %0|%0, %2}"
4806 [(set_attr "type" "sselog")
4807 (set_attr "prefix_data16" "1")
4808 (set_attr "mode" "TI")])
4810 (define_insn "*avx_interleave_lowv2di"
4811 [(set (match_operand:V2DI 0 "register_operand" "=x")
4814 (match_operand:V2DI 1 "register_operand" "x")
4815 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4816 (parallel [(const_int 0)
4819 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4820 [(set_attr "type" "sselog")
4821 (set_attr "prefix" "vex")
4822 (set_attr "mode" "TI")])
4824 (define_insn "vec_interleave_lowv2di"
4825 [(set (match_operand:V2DI 0 "register_operand" "=x")
4828 (match_operand:V2DI 1 "register_operand" "0")
4829 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4830 (parallel [(const_int 0)
4833 "punpcklqdq\t{%2, %0|%0, %2}"
4834 [(set_attr "type" "sselog")
4835 (set_attr "prefix_data16" "1")
4836 (set_attr "mode" "TI")])
4838 (define_insn "*avx_shufpd_<mode>"
4839 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4840 (vec_select:SSEMODE2D
4841 (vec_concat:<ssedoublesizemode>
4842 (match_operand:SSEMODE2D 1 "register_operand" "x")
4843 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4844 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4845 (match_operand 4 "const_2_to_3_operand" "")])))]
4849 mask = INTVAL (operands[3]);
4850 mask |= (INTVAL (operands[4]) - 2) << 1;
4851 operands[3] = GEN_INT (mask);
4853 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4855 [(set_attr "type" "sselog")
4856 (set_attr "length_immediate" "1")
4857 (set_attr "prefix" "vex")
4858 (set_attr "mode" "V2DF")])
4860 (define_insn "sse2_shufpd_<mode>"
4861 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4862 (vec_select:SSEMODE2D
4863 (vec_concat:<ssedoublesizemode>
4864 (match_operand:SSEMODE2D 1 "register_operand" "0")
4865 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4866 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4867 (match_operand 4 "const_2_to_3_operand" "")])))]
4871 mask = INTVAL (operands[3]);
4872 mask |= (INTVAL (operands[4]) - 2) << 1;
4873 operands[3] = GEN_INT (mask);
4875 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4877 [(set_attr "type" "sselog")
4878 (set_attr "length_immediate" "1")
4879 (set_attr "mode" "V2DF")])
4881 ;; Avoid combining registers from different units in a single alternative,
4882 ;; see comment above inline_secondary_memory_needed function in i386.c
4883 (define_insn "*avx_storehpd"
4884 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4886 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4887 (parallel [(const_int 1)])))]
4888 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4890 vmovhpd\t{%1, %0|%0, %1}
4891 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4895 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4896 (set_attr "prefix" "vex")
4897 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4899 (define_insn "sse2_storehpd"
4900 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4902 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4903 (parallel [(const_int 1)])))]
4904 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4906 movhpd\t{%1, %0|%0, %1}
4911 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4912 (set_attr "prefix_data16" "1,*,*,*,*")
4913 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4916 [(set (match_operand:DF 0 "register_operand" "")
4918 (match_operand:V2DF 1 "memory_operand" "")
4919 (parallel [(const_int 1)])))]
4920 "TARGET_SSE2 && reload_completed"
4921 [(set (match_dup 0) (match_dup 1))]
4922 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4924 ;; Avoid combining registers from different units in a single alternative,
4925 ;; see comment above inline_secondary_memory_needed function in i386.c
4926 (define_insn "sse2_storelpd"
4927 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4929 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4930 (parallel [(const_int 0)])))]
4931 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4933 %vmovlpd\t{%1, %0|%0, %1}
4938 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4939 (set_attr "prefix_data16" "1,*,*,*,*")
4940 (set_attr "prefix" "maybe_vex")
4941 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4944 [(set (match_operand:DF 0 "register_operand" "")
4946 (match_operand:V2DF 1 "nonimmediate_operand" "")
4947 (parallel [(const_int 0)])))]
4948 "TARGET_SSE2 && reload_completed"
4951 rtx op1 = operands[1];
4953 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4955 op1 = gen_lowpart (DFmode, op1);
4956 emit_move_insn (operands[0], op1);
4960 (define_expand "sse2_loadhpd_exp"
4961 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4964 (match_operand:V2DF 1 "nonimmediate_operand" "")
4965 (parallel [(const_int 0)]))
4966 (match_operand:DF 2 "nonimmediate_operand" "")))]
4969 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4971 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4973 /* Fix up the destination if needed. */
4974 if (dst != operands[0])
4975 emit_move_insn (operands[0], dst);
4980 ;; Avoid combining registers from different units in a single alternative,
4981 ;; see comment above inline_secondary_memory_needed function in i386.c
4982 (define_insn "*avx_loadhpd"
4983 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4986 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4987 (parallel [(const_int 0)]))
4988 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4989 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4991 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4992 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4996 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4997 (set_attr "prefix" "vex")
4998 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
5000 (define_insn "sse2_loadhpd"
5001 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
5004 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
5005 (parallel [(const_int 0)]))
5006 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
5007 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5009 movhpd\t{%2, %0|%0, %2}
5010 unpcklpd\t{%2, %0|%0, %2}
5011 shufpd\t{$1, %1, %0|%0, %1, 1}
5015 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
5016 (set_attr "prefix_data16" "1,*,*,*,*,*")
5017 (set_attr "length_immediate" "*,*,1,*,*,*")
5018 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
5021 [(set (match_operand:V2DF 0 "memory_operand" "")
5023 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5024 (match_operand:DF 1 "register_operand" "")))]
5025 "TARGET_SSE2 && reload_completed"
5026 [(set (match_dup 0) (match_dup 1))]
5027 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5029 (define_expand "sse2_loadlpd_exp"
5030 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
5032 (match_operand:DF 2 "nonimmediate_operand" "")
5034 (match_operand:V2DF 1 "nonimmediate_operand" "")
5035 (parallel [(const_int 1)]))))]
5038 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5040 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5042 /* Fix up the destination if needed. */
5043 if (dst != operands[0])
5044 emit_move_insn (operands[0], dst);
5049 ;; Avoid combining registers from different units in a single alternative,
5050 ;; see comment above inline_secondary_memory_needed function in i386.c
5051 (define_insn "*avx_loadlpd"
5052 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
5054 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
5056 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
5057 (parallel [(const_int 1)]))))]
5058 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5060 vmovsd\t{%2, %0|%0, %2}
5061 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5062 vmovsd\t{%2, %1, %0|%0, %1, %2}
5063 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5067 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5068 (set_attr "prefix" "vex")
5069 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5071 (define_insn "sse2_loadlpd"
5072 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5074 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5076 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5077 (parallel [(const_int 1)]))))]
5078 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5080 movsd\t{%2, %0|%0, %2}
5081 movlpd\t{%2, %0|%0, %2}
5082 movsd\t{%2, %0|%0, %2}
5083 shufpd\t{$2, %2, %0|%0, %2, 2}
5084 movhpd\t{%H1, %0|%0, %H1}
5088 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5089 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5090 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5091 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5094 [(set (match_operand:V2DF 0 "memory_operand" "")
5096 (match_operand:DF 1 "register_operand" "")
5097 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5098 "TARGET_SSE2 && reload_completed"
5099 [(set (match_dup 0) (match_dup 1))]
5100 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5102 ;; Not sure these two are ever used, but it doesn't hurt to have
5104 (define_insn "*vec_extractv2df_1_sse"
5105 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5107 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5108 (parallel [(const_int 1)])))]
5109 "!TARGET_SSE2 && TARGET_SSE
5110 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5112 movhps\t{%1, %0|%0, %1}
5113 movhlps\t{%1, %0|%0, %1}
5114 movlps\t{%H1, %0|%0, %H1}"
5115 [(set_attr "type" "ssemov")
5116 (set_attr "mode" "V2SF,V4SF,V2SF")])
5118 (define_insn "*vec_extractv2df_0_sse"
5119 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5121 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5122 (parallel [(const_int 0)])))]
5123 "!TARGET_SSE2 && TARGET_SSE
5124 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5126 movlps\t{%1, %0|%0, %1}
5127 movaps\t{%1, %0|%0, %1}
5128 movlps\t{%1, %0|%0, %1}"
5129 [(set_attr "type" "ssemov")
5130 (set_attr "mode" "V2SF,V4SF,V2SF")])
5132 (define_insn "*avx_movsd"
5133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5135 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5136 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5140 vmovsd\t{%2, %1, %0|%0, %1, %2}
5141 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5142 vmovlpd\t{%2, %0|%0, %2}
5143 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5144 vmovhps\t{%1, %H0|%H0, %1}"
5145 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5146 (set_attr "prefix" "vex")
5147 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5149 (define_insn "sse2_movsd"
5150 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5152 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5153 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5157 movsd\t{%2, %0|%0, %2}
5158 movlpd\t{%2, %0|%0, %2}
5159 movlpd\t{%2, %0|%0, %2}
5160 shufpd\t{$2, %2, %0|%0, %2, 2}
5161 movhps\t{%H1, %0|%0, %H1}
5162 movhps\t{%1, %H0|%H0, %1}"
5163 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5164 (set_attr "prefix_data16" "*,1,1,*,*,*")
5165 (set_attr "length_immediate" "*,*,*,1,*,*")
5166 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5168 (define_insn "*vec_dupv2df_sse3"
5169 [(set (match_operand:V2DF 0 "register_operand" "=x")
5171 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5173 "%vmovddup\t{%1, %0|%0, %1}"
5174 [(set_attr "type" "sselog1")
5175 (set_attr "prefix" "maybe_vex")
5176 (set_attr "mode" "DF")])
5178 (define_insn "vec_dupv2df"
5179 [(set (match_operand:V2DF 0 "register_operand" "=x")
5181 (match_operand:DF 1 "register_operand" "0")))]
5184 [(set_attr "type" "sselog1")
5185 (set_attr "mode" "V2DF")])
5187 (define_insn "*vec_concatv2df_sse3"
5188 [(set (match_operand:V2DF 0 "register_operand" "=x")
5190 (match_operand:DF 1 "nonimmediate_operand" "xm")
5193 "%vmovddup\t{%1, %0|%0, %1}"
5194 [(set_attr "type" "sselog1")
5195 (set_attr "prefix" "maybe_vex")
5196 (set_attr "mode" "DF")])
5198 (define_insn "*vec_concatv2df_avx"
5199 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5201 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5202 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5205 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5206 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5207 vmovsd\t{%1, %0|%0, %1}"
5208 [(set_attr "type" "ssemov")
5209 (set_attr "prefix" "vex")
5210 (set_attr "mode" "DF,V1DF,DF")])
5212 (define_insn "*vec_concatv2df"
5213 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5215 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5216 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5219 unpcklpd\t{%2, %0|%0, %2}
5220 movhpd\t{%2, %0|%0, %2}
5221 movsd\t{%1, %0|%0, %1}
5222 movlhps\t{%2, %0|%0, %2}
5223 movhps\t{%2, %0|%0, %2}"
5224 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5225 (set_attr "prefix_data16" "*,1,*,*,*")
5226 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5228 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5230 ;; Parallel integral arithmetic
5232 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5234 (define_expand "neg<mode>2"
5235 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5238 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5240 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5242 (define_expand "<plusminus_insn><mode>3"
5243 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5245 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5246 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5248 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5250 (define_insn "*avx_<plusminus_insn><mode>3"
5251 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5253 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5254 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5255 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5256 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5257 [(set_attr "type" "sseiadd")
5258 (set_attr "prefix" "vex")
5259 (set_attr "mode" "TI")])
5261 (define_insn "*<plusminus_insn><mode>3"
5262 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5264 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5265 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5266 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5267 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5268 [(set_attr "type" "sseiadd")
5269 (set_attr "prefix_data16" "1")
5270 (set_attr "mode" "TI")])
5272 (define_expand "sse2_<plusminus_insn><mode>3"
5273 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5274 (sat_plusminus:SSEMODE12
5275 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5276 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5278 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5280 (define_insn "*avx_<plusminus_insn><mode>3"
5281 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5282 (sat_plusminus:SSEMODE12
5283 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5284 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5285 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5286 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5287 [(set_attr "type" "sseiadd")
5288 (set_attr "prefix" "vex")
5289 (set_attr "mode" "TI")])
5291 (define_insn "*sse2_<plusminus_insn><mode>3"
5292 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5293 (sat_plusminus:SSEMODE12
5294 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5295 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5296 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5297 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5298 [(set_attr "type" "sseiadd")
5299 (set_attr "prefix_data16" "1")
5300 (set_attr "mode" "TI")])
5302 (define_insn_and_split "mulv16qi3"
5303 [(set (match_operand:V16QI 0 "register_operand" "")
5304 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5305 (match_operand:V16QI 2 "register_operand" "")))]
5307 && can_create_pseudo_p ()"
5315 for (i = 0; i < 6; ++i)
5316 t[i] = gen_reg_rtx (V16QImode);
5318 /* Unpack data such that we've got a source byte in each low byte of
5319 each word. We don't care what goes into the high byte of each word.
5320 Rather than trying to get zero in there, most convenient is to let
5321 it be a copy of the low byte. */
5322 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5323 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5324 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5325 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5327 /* Multiply words. The end-of-line annotations here give a picture of what
5328 the output of that instruction looks like. Dot means don't care; the
5329 letters are the bytes of the result with A being the most significant. */
5330 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5331 gen_lowpart (V8HImode, t[0]),
5332 gen_lowpart (V8HImode, t[1])));
5333 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5334 gen_lowpart (V8HImode, t[2]),
5335 gen_lowpart (V8HImode, t[3])));
5337 /* Extract the even bytes and merge them back together. */
5338 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5342 (define_expand "mulv8hi3"
5343 [(set (match_operand:V8HI 0 "register_operand" "")
5344 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5345 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5347 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5349 (define_insn "*avx_mulv8hi3"
5350 [(set (match_operand:V8HI 0 "register_operand" "=x")
5351 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5352 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5353 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5354 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5355 [(set_attr "type" "sseimul")
5356 (set_attr "prefix" "vex")
5357 (set_attr "mode" "TI")])
5359 (define_insn "*mulv8hi3"
5360 [(set (match_operand:V8HI 0 "register_operand" "=x")
5361 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5362 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5363 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5364 "pmullw\t{%2, %0|%0, %2}"
5365 [(set_attr "type" "sseimul")
5366 (set_attr "prefix_data16" "1")
5367 (set_attr "mode" "TI")])
5369 (define_expand "<s>mulv8hi3_highpart"
5370 [(set (match_operand:V8HI 0 "register_operand" "")
5375 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5377 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5380 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5382 (define_insn "*avx_<s>mulv8hi3_highpart"
5383 [(set (match_operand:V8HI 0 "register_operand" "=x")
5388 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5390 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5392 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5393 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5394 [(set_attr "type" "sseimul")
5395 (set_attr "prefix" "vex")
5396 (set_attr "mode" "TI")])
5398 (define_insn "*<s>mulv8hi3_highpart"
5399 [(set (match_operand:V8HI 0 "register_operand" "=x")
5404 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5406 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5408 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5409 "pmulh<u>w\t{%2, %0|%0, %2}"
5410 [(set_attr "type" "sseimul")
5411 (set_attr "prefix_data16" "1")
5412 (set_attr "mode" "TI")])
5414 (define_expand "sse2_umulv2siv2di3"
5415 [(set (match_operand:V2DI 0 "register_operand" "")
5419 (match_operand:V4SI 1 "nonimmediate_operand" "")
5420 (parallel [(const_int 0) (const_int 2)])))
5423 (match_operand:V4SI 2 "nonimmediate_operand" "")
5424 (parallel [(const_int 0) (const_int 2)])))))]
5426 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5428 (define_insn "*avx_umulv2siv2di3"
5429 [(set (match_operand:V2DI 0 "register_operand" "=x")
5433 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5434 (parallel [(const_int 0) (const_int 2)])))
5437 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5438 (parallel [(const_int 0) (const_int 2)])))))]
5439 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5440 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5441 [(set_attr "type" "sseimul")
5442 (set_attr "prefix" "vex")
5443 (set_attr "mode" "TI")])
5445 (define_insn "*sse2_umulv2siv2di3"
5446 [(set (match_operand:V2DI 0 "register_operand" "=x")
5450 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5451 (parallel [(const_int 0) (const_int 2)])))
5454 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5455 (parallel [(const_int 0) (const_int 2)])))))]
5456 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5457 "pmuludq\t{%2, %0|%0, %2}"
5458 [(set_attr "type" "sseimul")
5459 (set_attr "prefix_data16" "1")
5460 (set_attr "mode" "TI")])
5462 (define_expand "sse4_1_mulv2siv2di3"
5463 [(set (match_operand:V2DI 0 "register_operand" "")
5467 (match_operand:V4SI 1 "nonimmediate_operand" "")
5468 (parallel [(const_int 0) (const_int 2)])))
5471 (match_operand:V4SI 2 "nonimmediate_operand" "")
5472 (parallel [(const_int 0) (const_int 2)])))))]
5474 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5476 (define_insn "*avx_mulv2siv2di3"
5477 [(set (match_operand:V2DI 0 "register_operand" "=x")
5481 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5482 (parallel [(const_int 0) (const_int 2)])))
5485 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5486 (parallel [(const_int 0) (const_int 2)])))))]
5487 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5488 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5489 [(set_attr "type" "sseimul")
5490 (set_attr "prefix_extra" "1")
5491 (set_attr "prefix" "vex")
5492 (set_attr "mode" "TI")])
5494 (define_insn "*sse4_1_mulv2siv2di3"
5495 [(set (match_operand:V2DI 0 "register_operand" "=x")
5499 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5500 (parallel [(const_int 0) (const_int 2)])))
5503 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5504 (parallel [(const_int 0) (const_int 2)])))))]
5505 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5506 "pmuldq\t{%2, %0|%0, %2}"
5507 [(set_attr "type" "sseimul")
5508 (set_attr "prefix_extra" "1")
5509 (set_attr "mode" "TI")])
5511 (define_expand "sse2_pmaddwd"
5512 [(set (match_operand:V4SI 0 "register_operand" "")
5517 (match_operand:V8HI 1 "nonimmediate_operand" "")
5518 (parallel [(const_int 0)
5524 (match_operand:V8HI 2 "nonimmediate_operand" "")
5525 (parallel [(const_int 0)
5531 (vec_select:V4HI (match_dup 1)
5532 (parallel [(const_int 1)
5537 (vec_select:V4HI (match_dup 2)
5538 (parallel [(const_int 1)
5541 (const_int 7)]))))))]
5543 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5545 (define_insn "*avx_pmaddwd"
5546 [(set (match_operand:V4SI 0 "register_operand" "=x")
5551 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5552 (parallel [(const_int 0)
5558 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5559 (parallel [(const_int 0)
5565 (vec_select:V4HI (match_dup 1)
5566 (parallel [(const_int 1)
5571 (vec_select:V4HI (match_dup 2)
5572 (parallel [(const_int 1)
5575 (const_int 7)]))))))]
5576 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5577 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5578 [(set_attr "type" "sseiadd")
5579 (set_attr "prefix" "vex")
5580 (set_attr "mode" "TI")])
5582 (define_insn "*sse2_pmaddwd"
5583 [(set (match_operand:V4SI 0 "register_operand" "=x")
5588 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5589 (parallel [(const_int 0)
5595 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5596 (parallel [(const_int 0)
5602 (vec_select:V4HI (match_dup 1)
5603 (parallel [(const_int 1)
5608 (vec_select:V4HI (match_dup 2)
5609 (parallel [(const_int 1)
5612 (const_int 7)]))))))]
5613 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5614 "pmaddwd\t{%2, %0|%0, %2}"
5615 [(set_attr "type" "sseiadd")
5616 (set_attr "atom_unit" "simul")
5617 (set_attr "prefix_data16" "1")
5618 (set_attr "mode" "TI")])
5620 (define_expand "mulv4si3"
5621 [(set (match_operand:V4SI 0 "register_operand" "")
5622 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5623 (match_operand:V4SI 2 "register_operand" "")))]
5626 if (TARGET_SSE4_1 || TARGET_AVX)
5627 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5630 (define_insn "*avx_mulv4si3"
5631 [(set (match_operand:V4SI 0 "register_operand" "=x")
5632 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5633 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5634 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5635 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5636 [(set_attr "type" "sseimul")
5637 (set_attr "prefix_extra" "1")
5638 (set_attr "prefix" "vex")
5639 (set_attr "mode" "TI")])
5641 (define_insn "*sse4_1_mulv4si3"
5642 [(set (match_operand:V4SI 0 "register_operand" "=x")
5643 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5644 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5645 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5646 "pmulld\t{%2, %0|%0, %2}"
5647 [(set_attr "type" "sseimul")
5648 (set_attr "prefix_extra" "1")
5649 (set_attr "mode" "TI")])
5651 (define_insn_and_split "*sse2_mulv4si3"
5652 [(set (match_operand:V4SI 0 "register_operand" "")
5653 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5654 (match_operand:V4SI 2 "register_operand" "")))]
5655 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5656 && can_create_pseudo_p ()"
5661 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5667 t1 = gen_reg_rtx (V4SImode);
5668 t2 = gen_reg_rtx (V4SImode);
5669 t3 = gen_reg_rtx (V4SImode);
5670 t4 = gen_reg_rtx (V4SImode);
5671 t5 = gen_reg_rtx (V4SImode);
5672 t6 = gen_reg_rtx (V4SImode);
5673 thirtytwo = GEN_INT (32);
5675 /* Multiply elements 2 and 0. */
5676 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5679 /* Shift both input vectors down one element, so that elements 3
5680 and 1 are now in the slots for elements 2 and 0. For K8, at
5681 least, this is faster than using a shuffle. */
5682 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5683 gen_lowpart (V1TImode, op1),
5685 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5686 gen_lowpart (V1TImode, op2),
5688 /* Multiply elements 3 and 1. */
5689 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5692 /* Move the results in element 2 down to element 1; we don't care
5693 what goes in elements 2 and 3. */
5694 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5695 const0_rtx, const0_rtx));
5696 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5697 const0_rtx, const0_rtx));
5699 /* Merge the parts back together. */
5700 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5704 (define_insn_and_split "mulv2di3"
5705 [(set (match_operand:V2DI 0 "register_operand" "")
5706 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5707 (match_operand:V2DI 2 "register_operand" "")))]
5709 && can_create_pseudo_p ()"
5714 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5723 /* op1: A,B,C,D, op2: E,F,G,H */
5724 op1 = gen_lowpart (V4SImode, op1);
5725 op2 = gen_lowpart (V4SImode, op2);
5727 t1 = gen_reg_rtx (V4SImode);
5728 t2 = gen_reg_rtx (V4SImode);
5729 t3 = gen_reg_rtx (V2DImode);
5730 t4 = gen_reg_rtx (V2DImode);
5733 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5739 /* t2: (B*E),(A*F),(D*G),(C*H) */
5740 emit_insn (gen_mulv4si3 (t2, t1, op2));
5742 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5743 emit_insn (gen_xop_phadddq (t3, t2));
5745 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5746 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5748 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5749 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5753 t1 = gen_reg_rtx (V2DImode);
5754 t2 = gen_reg_rtx (V2DImode);
5755 t3 = gen_reg_rtx (V2DImode);
5756 t4 = gen_reg_rtx (V2DImode);
5757 t5 = gen_reg_rtx (V2DImode);
5758 t6 = gen_reg_rtx (V2DImode);
5759 thirtytwo = GEN_INT (32);
5761 /* Multiply low parts. */
5762 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5763 gen_lowpart (V4SImode, op2)));
5765 /* Shift input vectors left 32 bits so we can multiply high parts. */
5766 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5767 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5769 /* Multiply high parts by low parts. */
5770 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5771 gen_lowpart (V4SImode, t3)));
5772 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5773 gen_lowpart (V4SImode, t2)));
5775 /* Shift them back. */
5776 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5777 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5779 /* Add the three parts together. */
5780 emit_insn (gen_addv2di3 (t6, t1, t4));
5781 emit_insn (gen_addv2di3 (op0, t6, t5));
5786 (define_expand "vec_widen_smult_hi_v8hi"
5787 [(match_operand:V4SI 0 "register_operand" "")
5788 (match_operand:V8HI 1 "register_operand" "")
5789 (match_operand:V8HI 2 "register_operand" "")]
5792 rtx op1, op2, t1, t2, dest;
5796 t1 = gen_reg_rtx (V8HImode);
5797 t2 = gen_reg_rtx (V8HImode);
5798 dest = gen_lowpart (V8HImode, operands[0]);
5800 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5801 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5802 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5806 (define_expand "vec_widen_smult_lo_v8hi"
5807 [(match_operand:V4SI 0 "register_operand" "")
5808 (match_operand:V8HI 1 "register_operand" "")
5809 (match_operand:V8HI 2 "register_operand" "")]
5812 rtx op1, op2, t1, t2, dest;
5816 t1 = gen_reg_rtx (V8HImode);
5817 t2 = gen_reg_rtx (V8HImode);
5818 dest = gen_lowpart (V8HImode, operands[0]);
5820 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5821 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5822 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5826 (define_expand "vec_widen_umult_hi_v8hi"
5827 [(match_operand:V4SI 0 "register_operand" "")
5828 (match_operand:V8HI 1 "register_operand" "")
5829 (match_operand:V8HI 2 "register_operand" "")]
5832 rtx op1, op2, t1, t2, dest;
5836 t1 = gen_reg_rtx (V8HImode);
5837 t2 = gen_reg_rtx (V8HImode);
5838 dest = gen_lowpart (V8HImode, operands[0]);
5840 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5841 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5842 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5846 (define_expand "vec_widen_umult_lo_v8hi"
5847 [(match_operand:V4SI 0 "register_operand" "")
5848 (match_operand:V8HI 1 "register_operand" "")
5849 (match_operand:V8HI 2 "register_operand" "")]
5852 rtx op1, op2, t1, t2, dest;
5856 t1 = gen_reg_rtx (V8HImode);
5857 t2 = gen_reg_rtx (V8HImode);
5858 dest = gen_lowpart (V8HImode, operands[0]);
5860 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5861 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5862 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5866 (define_expand "vec_widen_smult_hi_v4si"
5867 [(match_operand:V2DI 0 "register_operand" "")
5868 (match_operand:V4SI 1 "register_operand" "")
5869 (match_operand:V4SI 2 "register_operand" "")]
5874 t1 = gen_reg_rtx (V4SImode);
5875 t2 = gen_reg_rtx (V4SImode);
5877 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5882 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5887 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5891 (define_expand "vec_widen_smult_lo_v4si"
5892 [(match_operand:V2DI 0 "register_operand" "")
5893 (match_operand:V4SI 1 "register_operand" "")
5894 (match_operand:V4SI 2 "register_operand" "")]
5899 t1 = gen_reg_rtx (V4SImode);
5900 t2 = gen_reg_rtx (V4SImode);
5902 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5907 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5912 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5916 (define_expand "vec_widen_umult_hi_v4si"
5917 [(match_operand:V2DI 0 "register_operand" "")
5918 (match_operand:V4SI 1 "register_operand" "")
5919 (match_operand:V4SI 2 "register_operand" "")]
5922 rtx op1, op2, t1, t2;
5926 t1 = gen_reg_rtx (V4SImode);
5927 t2 = gen_reg_rtx (V4SImode);
5929 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5930 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5931 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5935 (define_expand "vec_widen_umult_lo_v4si"
5936 [(match_operand:V2DI 0 "register_operand" "")
5937 (match_operand:V4SI 1 "register_operand" "")
5938 (match_operand:V4SI 2 "register_operand" "")]
5941 rtx op1, op2, t1, t2;
5945 t1 = gen_reg_rtx (V4SImode);
5946 t2 = gen_reg_rtx (V4SImode);
5948 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5949 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5950 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5954 (define_expand "sdot_prodv8hi"
5955 [(match_operand:V4SI 0 "register_operand" "")
5956 (match_operand:V8HI 1 "register_operand" "")
5957 (match_operand:V8HI 2 "register_operand" "")
5958 (match_operand:V4SI 3 "register_operand" "")]
5961 rtx t = gen_reg_rtx (V4SImode);
5962 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5963 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5967 (define_expand "udot_prodv4si"
5968 [(match_operand:V2DI 0 "register_operand" "")
5969 (match_operand:V4SI 1 "register_operand" "")
5970 (match_operand:V4SI 2 "register_operand" "")
5971 (match_operand:V2DI 3 "register_operand" "")]
5976 t1 = gen_reg_rtx (V2DImode);
5977 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5978 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5980 t2 = gen_reg_rtx (V4SImode);
5981 t3 = gen_reg_rtx (V4SImode);
5982 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5983 gen_lowpart (V1TImode, operands[1]),
5985 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5986 gen_lowpart (V1TImode, operands[2]),
5989 t4 = gen_reg_rtx (V2DImode);
5990 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5992 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5996 (define_insn "*avx_ashr<mode>3"
5997 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5999 (match_operand:SSEMODE24 1 "register_operand" "x")
6000 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6002 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6003 [(set_attr "type" "sseishft")
6004 (set_attr "prefix" "vex")
6005 (set (attr "length_immediate")
6006 (if_then_else (match_operand 2 "const_int_operand" "")
6008 (const_string "0")))
6009 (set_attr "mode" "TI")])
6011 (define_insn "ashr<mode>3"
6012 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6014 (match_operand:SSEMODE24 1 "register_operand" "0")
6015 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6017 "psra<ssevecsize>\t{%2, %0|%0, %2}"
6018 [(set_attr "type" "sseishft")
6019 (set_attr "prefix_data16" "1")
6020 (set (attr "length_immediate")
6021 (if_then_else (match_operand 2 "const_int_operand" "")
6023 (const_string "0")))
6024 (set_attr "mode" "TI")])
6026 (define_insn "*avx_lshrv1ti3"
6027 [(set (match_operand:V1TI 0 "register_operand" "=x")
6029 (match_operand:V1TI 1 "register_operand" "x")
6030 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6033 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6034 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6036 [(set_attr "type" "sseishft")
6037 (set_attr "prefix" "vex")
6038 (set_attr "length_immediate" "1")
6039 (set_attr "mode" "TI")])
6041 (define_insn "*avx_lshr<mode>3"
6042 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6043 (lshiftrt:SSEMODE248
6044 (match_operand:SSEMODE248 1 "register_operand" "x")
6045 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6047 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6048 [(set_attr "type" "sseishft")
6049 (set_attr "prefix" "vex")
6050 (set (attr "length_immediate")
6051 (if_then_else (match_operand 2 "const_int_operand" "")
6053 (const_string "0")))
6054 (set_attr "mode" "TI")])
6056 (define_insn "sse2_lshrv1ti3"
6057 [(set (match_operand:V1TI 0 "register_operand" "=x")
6059 (match_operand:V1TI 1 "register_operand" "0")
6060 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6063 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6064 return "psrldq\t{%2, %0|%0, %2}";
6066 [(set_attr "type" "sseishft")
6067 (set_attr "prefix_data16" "1")
6068 (set_attr "length_immediate" "1")
6069 (set_attr "atom_unit" "sishuf")
6070 (set_attr "mode" "TI")])
6072 (define_insn "lshr<mode>3"
6073 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6074 (lshiftrt:SSEMODE248
6075 (match_operand:SSEMODE248 1 "register_operand" "0")
6076 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6078 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6079 [(set_attr "type" "sseishft")
6080 (set_attr "prefix_data16" "1")
6081 (set (attr "length_immediate")
6082 (if_then_else (match_operand 2 "const_int_operand" "")
6084 (const_string "0")))
6085 (set_attr "mode" "TI")])
6087 (define_insn "*avx_ashlv1ti3"
6088 [(set (match_operand:V1TI 0 "register_operand" "=x")
6089 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6090 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6093 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6094 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6096 [(set_attr "type" "sseishft")
6097 (set_attr "prefix" "vex")
6098 (set_attr "length_immediate" "1")
6099 (set_attr "mode" "TI")])
6101 (define_insn "*avx_ashl<mode>3"
6102 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6104 (match_operand:SSEMODE248 1 "register_operand" "x")
6105 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6107 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6108 [(set_attr "type" "sseishft")
6109 (set_attr "prefix" "vex")
6110 (set (attr "length_immediate")
6111 (if_then_else (match_operand 2 "const_int_operand" "")
6113 (const_string "0")))
6114 (set_attr "mode" "TI")])
6116 (define_insn "sse2_ashlv1ti3"
6117 [(set (match_operand:V1TI 0 "register_operand" "=x")
6118 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6119 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6122 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6123 return "pslldq\t{%2, %0|%0, %2}";
6125 [(set_attr "type" "sseishft")
6126 (set_attr "prefix_data16" "1")
6127 (set_attr "length_immediate" "1")
6128 (set_attr "mode" "TI")])
6130 (define_insn "ashl<mode>3"
6131 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6133 (match_operand:SSEMODE248 1 "register_operand" "0")
6134 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6136 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6137 [(set_attr "type" "sseishft")
6138 (set_attr "prefix_data16" "1")
6139 (set (attr "length_immediate")
6140 (if_then_else (match_operand 2 "const_int_operand" "")
6142 (const_string "0")))
6143 (set_attr "mode" "TI")])
6145 (define_expand "vec_shl_<mode>"
6146 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6148 (match_operand:SSEMODEI 1 "register_operand" "")
6149 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6152 operands[0] = gen_lowpart (V1TImode, operands[0]);
6153 operands[1] = gen_lowpart (V1TImode, operands[1]);
6156 (define_expand "vec_shr_<mode>"
6157 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6159 (match_operand:SSEMODEI 1 "register_operand" "")
6160 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6163 operands[0] = gen_lowpart (V1TImode, operands[0]);
6164 operands[1] = gen_lowpart (V1TImode, operands[1]);
6167 (define_insn "*avx_<code><mode>3"
6168 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6170 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6171 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6172 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6173 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6174 [(set_attr "type" "sseiadd")
6175 (set (attr "prefix_extra")
6176 (if_then_else (match_operand:V16QI 0 "" "")
6178 (const_string "1")))
6179 (set_attr "prefix" "vex")
6180 (set_attr "mode" "TI")])
6182 (define_expand "<code>v16qi3"
6183 [(set (match_operand:V16QI 0 "register_operand" "")
6185 (match_operand:V16QI 1 "nonimmediate_operand" "")
6186 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6188 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6190 (define_insn "*<code>v16qi3"
6191 [(set (match_operand:V16QI 0 "register_operand" "=x")
6193 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6194 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6195 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6196 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6197 [(set_attr "type" "sseiadd")
6198 (set_attr "prefix_data16" "1")
6199 (set_attr "mode" "TI")])
6201 (define_insn "*avx_<code><mode>3"
6202 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6204 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6205 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6206 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6207 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6208 [(set_attr "type" "sseiadd")
6209 (set (attr "prefix_extra")
6210 (if_then_else (match_operand:V8HI 0 "" "")
6212 (const_string "1")))
6213 (set_attr "prefix" "vex")
6214 (set_attr "mode" "TI")])
6216 (define_expand "<code>v8hi3"
6217 [(set (match_operand:V8HI 0 "register_operand" "")
6219 (match_operand:V8HI 1 "nonimmediate_operand" "")
6220 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6222 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6224 (define_insn "*<code>v8hi3"
6225 [(set (match_operand:V8HI 0 "register_operand" "=x")
6227 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6228 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6229 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6230 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6231 [(set_attr "type" "sseiadd")
6232 (set_attr "prefix_data16" "1")
6233 (set_attr "mode" "TI")])
6235 (define_expand "umaxv8hi3"
6236 [(set (match_operand:V8HI 0 "register_operand" "")
6237 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6238 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6242 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6245 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6246 if (rtx_equal_p (op3, op2))
6247 op3 = gen_reg_rtx (V8HImode);
6248 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6249 emit_insn (gen_addv8hi3 (op0, op3, op2));
6254 (define_expand "smax<mode>3"
6255 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6256 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6257 (match_operand:SSEMODE14 2 "register_operand" "")))]
6261 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6267 xops[0] = operands[0];
6268 xops[1] = operands[1];
6269 xops[2] = operands[2];
6270 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6271 xops[4] = operands[1];
6272 xops[5] = operands[2];
6273 ok = ix86_expand_int_vcond (xops);
6279 (define_insn "*sse4_1_<code><mode>3"
6280 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6282 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6283 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6284 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6285 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6286 [(set_attr "type" "sseiadd")
6287 (set_attr "prefix_extra" "1")
6288 (set_attr "mode" "TI")])
6290 (define_expand "smaxv2di3"
6291 [(set (match_operand:V2DI 0 "register_operand" "")
6292 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6293 (match_operand:V2DI 2 "register_operand" "")))]
6299 xops[0] = operands[0];
6300 xops[1] = operands[1];
6301 xops[2] = operands[2];
6302 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6303 xops[4] = operands[1];
6304 xops[5] = operands[2];
6305 ok = ix86_expand_int_vcond (xops);
6310 (define_expand "umaxv4si3"
6311 [(set (match_operand:V4SI 0 "register_operand" "")
6312 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6313 (match_operand:V4SI 2 "register_operand" "")))]
6317 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6323 xops[0] = operands[0];
6324 xops[1] = operands[1];
6325 xops[2] = operands[2];
6326 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6327 xops[4] = operands[1];
6328 xops[5] = operands[2];
6329 ok = ix86_expand_int_vcond (xops);
6335 (define_insn "*sse4_1_<code><mode>3"
6336 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6338 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6339 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6340 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6341 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6342 [(set_attr "type" "sseiadd")
6343 (set_attr "prefix_extra" "1")
6344 (set_attr "mode" "TI")])
6346 (define_expand "umaxv2di3"
6347 [(set (match_operand:V2DI 0 "register_operand" "")
6348 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6349 (match_operand:V2DI 2 "register_operand" "")))]
6355 xops[0] = operands[0];
6356 xops[1] = operands[1];
6357 xops[2] = operands[2];
6358 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6359 xops[4] = operands[1];
6360 xops[5] = operands[2];
6361 ok = ix86_expand_int_vcond (xops);
6366 (define_expand "smin<mode>3"
6367 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6368 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6369 (match_operand:SSEMODE14 2 "register_operand" "")))]
6373 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6379 xops[0] = operands[0];
6380 xops[1] = operands[2];
6381 xops[2] = operands[1];
6382 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6383 xops[4] = operands[1];
6384 xops[5] = operands[2];
6385 ok = ix86_expand_int_vcond (xops);
6391 (define_expand "sminv2di3"
6392 [(set (match_operand:V2DI 0 "register_operand" "")
6393 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6394 (match_operand:V2DI 2 "register_operand" "")))]
6400 xops[0] = operands[0];
6401 xops[1] = operands[2];
6402 xops[2] = operands[1];
6403 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6404 xops[4] = operands[1];
6405 xops[5] = operands[2];
6406 ok = ix86_expand_int_vcond (xops);
6411 (define_expand "umin<mode>3"
6412 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6413 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6414 (match_operand:SSEMODE24 2 "register_operand" "")))]
6418 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6424 xops[0] = operands[0];
6425 xops[1] = operands[2];
6426 xops[2] = operands[1];
6427 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6428 xops[4] = operands[1];
6429 xops[5] = operands[2];
6430 ok = ix86_expand_int_vcond (xops);
6436 (define_expand "uminv2di3"
6437 [(set (match_operand:V2DI 0 "register_operand" "")
6438 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6439 (match_operand:V2DI 2 "register_operand" "")))]
6445 xops[0] = operands[0];
6446 xops[1] = operands[2];
6447 xops[2] = operands[1];
6448 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6449 xops[4] = operands[1];
6450 xops[5] = operands[2];
6451 ok = ix86_expand_int_vcond (xops);
6456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6458 ;; Parallel integral comparisons
6460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6462 (define_expand "sse2_eq<mode>3"
6463 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6465 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6466 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6467 "TARGET_SSE2 && !TARGET_XOP "
6468 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6470 (define_insn "*avx_eq<mode>3"
6471 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6473 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6474 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6475 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6476 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6477 [(set_attr "type" "ssecmp")
6478 (set (attr "prefix_extra")
6479 (if_then_else (match_operand:V2DI 0 "" "")
6481 (const_string "*")))
6482 (set_attr "prefix" "vex")
6483 (set_attr "mode" "TI")])
6485 (define_insn "*sse2_eq<mode>3"
6486 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6488 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6489 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6490 "TARGET_SSE2 && !TARGET_XOP
6491 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6492 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6493 [(set_attr "type" "ssecmp")
6494 (set_attr "prefix_data16" "1")
6495 (set_attr "mode" "TI")])
6497 (define_expand "sse4_1_eqv2di3"
6498 [(set (match_operand:V2DI 0 "register_operand" "")
6500 (match_operand:V2DI 1 "nonimmediate_operand" "")
6501 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6503 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6505 (define_insn "*sse4_1_eqv2di3"
6506 [(set (match_operand:V2DI 0 "register_operand" "=x")
6508 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6509 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6510 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6511 "pcmpeqq\t{%2, %0|%0, %2}"
6512 [(set_attr "type" "ssecmp")
6513 (set_attr "prefix_extra" "1")
6514 (set_attr "mode" "TI")])
6516 (define_insn "*avx_gt<mode>3"
6517 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6519 (match_operand:SSEMODE1248 1 "register_operand" "x")
6520 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6522 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6523 [(set_attr "type" "ssecmp")
6524 (set (attr "prefix_extra")
6525 (if_then_else (match_operand:V2DI 0 "" "")
6527 (const_string "*")))
6528 (set_attr "prefix" "vex")
6529 (set_attr "mode" "TI")])
6531 (define_insn "sse2_gt<mode>3"
6532 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6534 (match_operand:SSEMODE124 1 "register_operand" "0")
6535 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6536 "TARGET_SSE2 && !TARGET_XOP"
6537 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6538 [(set_attr "type" "ssecmp")
6539 (set_attr "prefix_data16" "1")
6540 (set_attr "mode" "TI")])
6542 (define_insn "sse4_2_gtv2di3"
6543 [(set (match_operand:V2DI 0 "register_operand" "=x")
6545 (match_operand:V2DI 1 "register_operand" "0")
6546 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6548 "pcmpgtq\t{%2, %0|%0, %2}"
6549 [(set_attr "type" "ssecmp")
6550 (set_attr "prefix_extra" "1")
6551 (set_attr "mode" "TI")])
6553 (define_expand "vcond<mode>"
6554 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6555 (if_then_else:SSEMODE124C8
6556 (match_operator 3 ""
6557 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6558 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6559 (match_operand:SSEMODE124C8 1 "general_operand" "")
6560 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6563 bool ok = ix86_expand_int_vcond (operands);
6568 (define_expand "vcondu<mode>"
6569 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6570 (if_then_else:SSEMODE124C8
6571 (match_operator 3 ""
6572 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6573 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6574 (match_operand:SSEMODE124C8 1 "general_operand" "")
6575 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6578 bool ok = ix86_expand_int_vcond (operands);
6583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6585 ;; Parallel bitwise logical operations
6587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6589 (define_expand "one_cmpl<mode>2"
6590 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6591 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6595 int i, n = GET_MODE_NUNITS (<MODE>mode);
6596 rtvec v = rtvec_alloc (n);
6598 for (i = 0; i < n; ++i)
6599 RTVEC_ELT (v, i) = constm1_rtx;
6601 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6604 (define_insn "*avx_andnot<mode>3"
6605 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6607 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6608 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6610 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6611 [(set_attr "type" "sselog")
6612 (set_attr "prefix" "vex")
6613 (set_attr "mode" "<avxvecpsmode>")])
6615 (define_insn "*sse_andnot<mode>3"
6616 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6618 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6619 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6620 "(TARGET_SSE && !TARGET_SSE2)"
6621 "andnps\t{%2, %0|%0, %2}"
6622 [(set_attr "type" "sselog")
6623 (set_attr "mode" "V4SF")])
6625 (define_insn "*avx_andnot<mode>3"
6626 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6628 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6629 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6631 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6632 [(set_attr "type" "sselog")
6633 (set_attr "prefix" "vex")
6634 (set_attr "mode" "TI")])
6636 (define_insn "sse2_andnot<mode>3"
6637 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6639 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6640 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6642 "pandn\t{%2, %0|%0, %2}"
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix_data16" "1")
6645 (set_attr "mode" "TI")])
6647 (define_insn "*andnottf3"
6648 [(set (match_operand:TF 0 "register_operand" "=x")
6650 (not:TF (match_operand:TF 1 "register_operand" "0"))
6651 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6653 "pandn\t{%2, %0|%0, %2}"
6654 [(set_attr "type" "sselog")
6655 (set_attr "prefix_data16" "1")
6656 (set_attr "mode" "TI")])
6658 (define_expand "<code><mode>3"
6659 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6661 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6662 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6664 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6666 (define_insn "*avx_<code><mode>3"
6667 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6668 (any_logic:AVX256MODEI
6669 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6670 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6672 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6673 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6674 [(set_attr "type" "sselog")
6675 (set_attr "prefix" "vex")
6676 (set_attr "mode" "<avxvecpsmode>")])
6678 (define_insn "*sse_<code><mode>3"
6679 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6681 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6682 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6683 "(TARGET_SSE && !TARGET_SSE2)
6684 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6685 "<logic>ps\t{%2, %0|%0, %2}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "mode" "V4SF")])
6689 (define_insn "*avx_<code><mode>3"
6690 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6692 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6693 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6695 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6696 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6697 [(set_attr "type" "sselog")
6698 (set_attr "prefix" "vex")
6699 (set_attr "mode" "TI")])
6701 (define_insn "*sse2_<code><mode>3"
6702 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6704 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6705 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6706 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6707 "p<logic>\t{%2, %0|%0, %2}"
6708 [(set_attr "type" "sselog")
6709 (set_attr "prefix_data16" "1")
6710 (set_attr "mode" "TI")])
6712 (define_expand "<code>tf3"
6713 [(set (match_operand:TF 0 "register_operand" "")
6715 (match_operand:TF 1 "nonimmediate_operand" "")
6716 (match_operand:TF 2 "nonimmediate_operand" "")))]
6718 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6720 (define_insn "*<code>tf3"
6721 [(set (match_operand:TF 0 "register_operand" "=x")
6723 (match_operand:TF 1 "nonimmediate_operand" "%0")
6724 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6725 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6726 "p<logic>\t{%2, %0|%0, %2}"
6727 [(set_attr "type" "sselog")
6728 (set_attr "prefix_data16" "1")
6729 (set_attr "mode" "TI")])
6731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6733 ;; Parallel integral element swizzling
6735 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6737 (define_expand "vec_pack_trunc_v8hi"
6738 [(match_operand:V16QI 0 "register_operand" "")
6739 (match_operand:V8HI 1 "register_operand" "")
6740 (match_operand:V8HI 2 "register_operand" "")]
6743 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6744 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6745 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6749 (define_expand "vec_pack_trunc_v4si"
6750 [(match_operand:V8HI 0 "register_operand" "")
6751 (match_operand:V4SI 1 "register_operand" "")
6752 (match_operand:V4SI 2 "register_operand" "")]
6755 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6756 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6757 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6761 (define_expand "vec_pack_trunc_v2di"
6762 [(match_operand:V4SI 0 "register_operand" "")
6763 (match_operand:V2DI 1 "register_operand" "")
6764 (match_operand:V2DI 2 "register_operand" "")]
6767 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6768 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6769 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6773 (define_insn "*avx_packsswb"
6774 [(set (match_operand:V16QI 0 "register_operand" "=x")
6777 (match_operand:V8HI 1 "register_operand" "x"))
6779 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6781 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6782 [(set_attr "type" "sselog")
6783 (set_attr "prefix" "vex")
6784 (set_attr "mode" "TI")])
6786 (define_insn "sse2_packsswb"
6787 [(set (match_operand:V16QI 0 "register_operand" "=x")
6790 (match_operand:V8HI 1 "register_operand" "0"))
6792 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6794 "packsswb\t{%2, %0|%0, %2}"
6795 [(set_attr "type" "sselog")
6796 (set_attr "prefix_data16" "1")
6797 (set_attr "mode" "TI")])
6799 (define_insn "*avx_packssdw"
6800 [(set (match_operand:V8HI 0 "register_operand" "=x")
6803 (match_operand:V4SI 1 "register_operand" "x"))
6805 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6807 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix" "vex")
6810 (set_attr "mode" "TI")])
6812 (define_insn "sse2_packssdw"
6813 [(set (match_operand:V8HI 0 "register_operand" "=x")
6816 (match_operand:V4SI 1 "register_operand" "0"))
6818 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6820 "packssdw\t{%2, %0|%0, %2}"
6821 [(set_attr "type" "sselog")
6822 (set_attr "prefix_data16" "1")
6823 (set_attr "mode" "TI")])
6825 (define_insn "*avx_packuswb"
6826 [(set (match_operand:V16QI 0 "register_operand" "=x")
6829 (match_operand:V8HI 1 "register_operand" "x"))
6831 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6833 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6834 [(set_attr "type" "sselog")
6835 (set_attr "prefix" "vex")
6836 (set_attr "mode" "TI")])
6838 (define_insn "sse2_packuswb"
6839 [(set (match_operand:V16QI 0 "register_operand" "=x")
6842 (match_operand:V8HI 1 "register_operand" "0"))
6844 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6846 "packuswb\t{%2, %0|%0, %2}"
6847 [(set_attr "type" "sselog")
6848 (set_attr "prefix_data16" "1")
6849 (set_attr "mode" "TI")])
6851 (define_insn "*avx_interleave_highv16qi"
6852 [(set (match_operand:V16QI 0 "register_operand" "=x")
6855 (match_operand:V16QI 1 "register_operand" "x")
6856 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6857 (parallel [(const_int 8) (const_int 24)
6858 (const_int 9) (const_int 25)
6859 (const_int 10) (const_int 26)
6860 (const_int 11) (const_int 27)
6861 (const_int 12) (const_int 28)
6862 (const_int 13) (const_int 29)
6863 (const_int 14) (const_int 30)
6864 (const_int 15) (const_int 31)])))]
6866 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6867 [(set_attr "type" "sselog")
6868 (set_attr "prefix" "vex")
6869 (set_attr "mode" "TI")])
6871 (define_insn "vec_interleave_highv16qi"
6872 [(set (match_operand:V16QI 0 "register_operand" "=x")
6875 (match_operand:V16QI 1 "register_operand" "0")
6876 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6877 (parallel [(const_int 8) (const_int 24)
6878 (const_int 9) (const_int 25)
6879 (const_int 10) (const_int 26)
6880 (const_int 11) (const_int 27)
6881 (const_int 12) (const_int 28)
6882 (const_int 13) (const_int 29)
6883 (const_int 14) (const_int 30)
6884 (const_int 15) (const_int 31)])))]
6886 "punpckhbw\t{%2, %0|%0, %2}"
6887 [(set_attr "type" "sselog")
6888 (set_attr "prefix_data16" "1")
6889 (set_attr "mode" "TI")])
6891 (define_insn "*avx_interleave_lowv16qi"
6892 [(set (match_operand:V16QI 0 "register_operand" "=x")
6895 (match_operand:V16QI 1 "register_operand" "x")
6896 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6897 (parallel [(const_int 0) (const_int 16)
6898 (const_int 1) (const_int 17)
6899 (const_int 2) (const_int 18)
6900 (const_int 3) (const_int 19)
6901 (const_int 4) (const_int 20)
6902 (const_int 5) (const_int 21)
6903 (const_int 6) (const_int 22)
6904 (const_int 7) (const_int 23)])))]
6906 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6907 [(set_attr "type" "sselog")
6908 (set_attr "prefix" "vex")
6909 (set_attr "mode" "TI")])
6911 (define_insn "vec_interleave_lowv16qi"
6912 [(set (match_operand:V16QI 0 "register_operand" "=x")
6915 (match_operand:V16QI 1 "register_operand" "0")
6916 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6917 (parallel [(const_int 0) (const_int 16)
6918 (const_int 1) (const_int 17)
6919 (const_int 2) (const_int 18)
6920 (const_int 3) (const_int 19)
6921 (const_int 4) (const_int 20)
6922 (const_int 5) (const_int 21)
6923 (const_int 6) (const_int 22)
6924 (const_int 7) (const_int 23)])))]
6926 "punpcklbw\t{%2, %0|%0, %2}"
6927 [(set_attr "type" "sselog")
6928 (set_attr "prefix_data16" "1")
6929 (set_attr "mode" "TI")])
6931 (define_insn "*avx_interleave_highv8hi"
6932 [(set (match_operand:V8HI 0 "register_operand" "=x")
6935 (match_operand:V8HI 1 "register_operand" "x")
6936 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6937 (parallel [(const_int 4) (const_int 12)
6938 (const_int 5) (const_int 13)
6939 (const_int 6) (const_int 14)
6940 (const_int 7) (const_int 15)])))]
6942 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6943 [(set_attr "type" "sselog")
6944 (set_attr "prefix" "vex")
6945 (set_attr "mode" "TI")])
6947 (define_insn "vec_interleave_highv8hi"
6948 [(set (match_operand:V8HI 0 "register_operand" "=x")
6951 (match_operand:V8HI 1 "register_operand" "0")
6952 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6953 (parallel [(const_int 4) (const_int 12)
6954 (const_int 5) (const_int 13)
6955 (const_int 6) (const_int 14)
6956 (const_int 7) (const_int 15)])))]
6958 "punpckhwd\t{%2, %0|%0, %2}"
6959 [(set_attr "type" "sselog")
6960 (set_attr "prefix_data16" "1")
6961 (set_attr "mode" "TI")])
6963 (define_insn "*avx_interleave_lowv8hi"
6964 [(set (match_operand:V8HI 0 "register_operand" "=x")
6967 (match_operand:V8HI 1 "register_operand" "x")
6968 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6969 (parallel [(const_int 0) (const_int 8)
6970 (const_int 1) (const_int 9)
6971 (const_int 2) (const_int 10)
6972 (const_int 3) (const_int 11)])))]
6974 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6975 [(set_attr "type" "sselog")
6976 (set_attr "prefix" "vex")
6977 (set_attr "mode" "TI")])
6979 (define_insn "vec_interleave_lowv8hi"
6980 [(set (match_operand:V8HI 0 "register_operand" "=x")
6983 (match_operand:V8HI 1 "register_operand" "0")
6984 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6985 (parallel [(const_int 0) (const_int 8)
6986 (const_int 1) (const_int 9)
6987 (const_int 2) (const_int 10)
6988 (const_int 3) (const_int 11)])))]
6990 "punpcklwd\t{%2, %0|%0, %2}"
6991 [(set_attr "type" "sselog")
6992 (set_attr "prefix_data16" "1")
6993 (set_attr "mode" "TI")])
6995 (define_insn "*avx_interleave_highv4si"
6996 [(set (match_operand:V4SI 0 "register_operand" "=x")
6999 (match_operand:V4SI 1 "register_operand" "x")
7000 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7001 (parallel [(const_int 2) (const_int 6)
7002 (const_int 3) (const_int 7)])))]
7004 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7005 [(set_attr "type" "sselog")
7006 (set_attr "prefix" "vex")
7007 (set_attr "mode" "TI")])
7009 (define_insn "vec_interleave_highv4si"
7010 [(set (match_operand:V4SI 0 "register_operand" "=x")
7013 (match_operand:V4SI 1 "register_operand" "0")
7014 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7015 (parallel [(const_int 2) (const_int 6)
7016 (const_int 3) (const_int 7)])))]
7018 "punpckhdq\t{%2, %0|%0, %2}"
7019 [(set_attr "type" "sselog")
7020 (set_attr "prefix_data16" "1")
7021 (set_attr "mode" "TI")])
7023 (define_insn "*avx_interleave_lowv4si"
7024 [(set (match_operand:V4SI 0 "register_operand" "=x")
7027 (match_operand:V4SI 1 "register_operand" "x")
7028 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7029 (parallel [(const_int 0) (const_int 4)
7030 (const_int 1) (const_int 5)])))]
7032 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7033 [(set_attr "type" "sselog")
7034 (set_attr "prefix" "vex")
7035 (set_attr "mode" "TI")])
7037 (define_insn "vec_interleave_lowv4si"
7038 [(set (match_operand:V4SI 0 "register_operand" "=x")
7041 (match_operand:V4SI 1 "register_operand" "0")
7042 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7043 (parallel [(const_int 0) (const_int 4)
7044 (const_int 1) (const_int 5)])))]
7046 "punpckldq\t{%2, %0|%0, %2}"
7047 [(set_attr "type" "sselog")
7048 (set_attr "prefix_data16" "1")
7049 (set_attr "mode" "TI")])
7051 (define_insn "*avx_pinsr<ssevecsize>"
7052 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7053 (vec_merge:SSEMODE124
7054 (vec_duplicate:SSEMODE124
7055 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7056 (match_operand:SSEMODE124 1 "register_operand" "x")
7057 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7060 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7061 if (MEM_P (operands[2]))
7062 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7064 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7066 [(set_attr "type" "sselog")
7067 (set (attr "prefix_extra")
7068 (if_then_else (match_operand:V8HI 0 "" "")
7070 (const_string "1")))
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "vex")
7073 (set_attr "mode" "TI")])
7075 (define_insn "*sse4_1_pinsrb"
7076 [(set (match_operand:V16QI 0 "register_operand" "=x")
7078 (vec_duplicate:V16QI
7079 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7080 (match_operand:V16QI 1 "register_operand" "0")
7081 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7084 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7085 if (MEM_P (operands[2]))
7086 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
7088 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7090 [(set_attr "type" "sselog")
7091 (set_attr "prefix_extra" "1")
7092 (set_attr "length_immediate" "1")
7093 (set_attr "mode" "TI")])
7095 (define_insn "*sse2_pinsrw"
7096 [(set (match_operand:V8HI 0 "register_operand" "=x")
7099 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7100 (match_operand:V8HI 1 "register_operand" "0")
7101 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7104 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7105 if (MEM_P (operands[2]))
7106 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7108 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7110 [(set_attr "type" "sselog")
7111 (set_attr "prefix_data16" "1")
7112 (set_attr "length_immediate" "1")
7113 (set_attr "mode" "TI")])
7115 ;; It must come before sse2_loadld since it is preferred.
7116 (define_insn "*sse4_1_pinsrd"
7117 [(set (match_operand:V4SI 0 "register_operand" "=x")
7120 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7121 (match_operand:V4SI 1 "register_operand" "0")
7122 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7125 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7126 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7128 [(set_attr "type" "sselog")
7129 (set_attr "prefix_extra" "1")
7130 (set_attr "length_immediate" "1")
7131 (set_attr "mode" "TI")])
7133 (define_insn "*avx_pinsrq"
7134 [(set (match_operand:V2DI 0 "register_operand" "=x")
7137 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7138 (match_operand:V2DI 1 "register_operand" "x")
7139 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7140 "TARGET_AVX && TARGET_64BIT"
7142 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7143 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7145 [(set_attr "type" "sselog")
7146 (set_attr "prefix_extra" "1")
7147 (set_attr "length_immediate" "1")
7148 (set_attr "prefix" "vex")
7149 (set_attr "mode" "TI")])
7151 (define_insn "*sse4_1_pinsrq"
7152 [(set (match_operand:V2DI 0 "register_operand" "=x")
7155 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7156 (match_operand:V2DI 1 "register_operand" "0")
7157 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7158 "TARGET_SSE4_1 && TARGET_64BIT"
7160 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7161 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7163 [(set_attr "type" "sselog")
7164 (set_attr "prefix_rex" "1")
7165 (set_attr "prefix_extra" "1")
7166 (set_attr "length_immediate" "1")
7167 (set_attr "mode" "TI")])
7169 (define_insn "*sse4_1_pextrb_<mode>"
7170 [(set (match_operand:SWI48 0 "register_operand" "=r")
7173 (match_operand:V16QI 1 "register_operand" "x")
7174 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7176 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7177 [(set_attr "type" "sselog")
7178 (set_attr "prefix_extra" "1")
7179 (set_attr "length_immediate" "1")
7180 (set_attr "prefix" "maybe_vex")
7181 (set_attr "mode" "TI")])
7183 (define_insn "*sse4_1_pextrb_memory"
7184 [(set (match_operand:QI 0 "memory_operand" "=m")
7186 (match_operand:V16QI 1 "register_operand" "x")
7187 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7189 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "type" "sselog")
7191 (set_attr "prefix_extra" "1")
7192 (set_attr "length_immediate" "1")
7193 (set_attr "prefix" "maybe_vex")
7194 (set_attr "mode" "TI")])
7196 (define_insn "*sse2_pextrw_<mode>"
7197 [(set (match_operand:SWI48 0 "register_operand" "=r")
7200 (match_operand:V8HI 1 "register_operand" "x")
7201 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7203 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7204 [(set_attr "type" "sselog")
7205 (set_attr "prefix_data16" "1")
7206 (set_attr "length_immediate" "1")
7207 (set_attr "prefix" "maybe_vex")
7208 (set_attr "mode" "TI")])
7210 (define_insn "*sse4_1_pextrw_memory"
7211 [(set (match_operand:HI 0 "memory_operand" "=m")
7213 (match_operand:V8HI 1 "register_operand" "x")
7214 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7216 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7217 [(set_attr "type" "sselog")
7218 (set_attr "prefix_extra" "1")
7219 (set_attr "length_immediate" "1")
7220 (set_attr "prefix" "maybe_vex")
7221 (set_attr "mode" "TI")])
7223 (define_insn "*sse4_1_pextrd"
7224 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7226 (match_operand:V4SI 1 "register_operand" "x")
7227 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7229 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7230 [(set_attr "type" "sselog")
7231 (set_attr "prefix_extra" "1")
7232 (set_attr "length_immediate" "1")
7233 (set_attr "prefix" "maybe_vex")
7234 (set_attr "mode" "TI")])
7236 (define_insn "*sse4_1_pextrd_zext"
7237 [(set (match_operand:DI 0 "register_operand" "=r")
7240 (match_operand:V4SI 1 "register_operand" "x")
7241 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7242 "TARGET_64BIT && TARGET_SSE4_1"
7243 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7244 [(set_attr "type" "sselog")
7245 (set_attr "prefix_extra" "1")
7246 (set_attr "length_immediate" "1")
7247 (set_attr "prefix" "maybe_vex")
7248 (set_attr "mode" "TI")])
7250 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7251 (define_insn "*sse4_1_pextrq"
7252 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7254 (match_operand:V2DI 1 "register_operand" "x")
7255 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7256 "TARGET_SSE4_1 && TARGET_64BIT"
7257 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7258 [(set_attr "type" "sselog")
7259 (set_attr "prefix_rex" "1")
7260 (set_attr "prefix_extra" "1")
7261 (set_attr "length_immediate" "1")
7262 (set_attr "prefix" "maybe_vex")
7263 (set_attr "mode" "TI")])
7265 (define_expand "sse2_pshufd"
7266 [(match_operand:V4SI 0 "register_operand" "")
7267 (match_operand:V4SI 1 "nonimmediate_operand" "")
7268 (match_operand:SI 2 "const_int_operand" "")]
7271 int mask = INTVAL (operands[2]);
7272 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7273 GEN_INT ((mask >> 0) & 3),
7274 GEN_INT ((mask >> 2) & 3),
7275 GEN_INT ((mask >> 4) & 3),
7276 GEN_INT ((mask >> 6) & 3)));
7280 (define_insn "sse2_pshufd_1"
7281 [(set (match_operand:V4SI 0 "register_operand" "=x")
7283 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7284 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7285 (match_operand 3 "const_0_to_3_operand" "")
7286 (match_operand 4 "const_0_to_3_operand" "")
7287 (match_operand 5 "const_0_to_3_operand" "")])))]
7291 mask |= INTVAL (operands[2]) << 0;
7292 mask |= INTVAL (operands[3]) << 2;
7293 mask |= INTVAL (operands[4]) << 4;
7294 mask |= INTVAL (operands[5]) << 6;
7295 operands[2] = GEN_INT (mask);
7297 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7299 [(set_attr "type" "sselog1")
7300 (set_attr "prefix_data16" "1")
7301 (set_attr "prefix" "maybe_vex")
7302 (set_attr "length_immediate" "1")
7303 (set_attr "mode" "TI")])
7305 (define_expand "sse2_pshuflw"
7306 [(match_operand:V8HI 0 "register_operand" "")
7307 (match_operand:V8HI 1 "nonimmediate_operand" "")
7308 (match_operand:SI 2 "const_int_operand" "")]
7311 int mask = INTVAL (operands[2]);
7312 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7313 GEN_INT ((mask >> 0) & 3),
7314 GEN_INT ((mask >> 2) & 3),
7315 GEN_INT ((mask >> 4) & 3),
7316 GEN_INT ((mask >> 6) & 3)));
7320 (define_insn "sse2_pshuflw_1"
7321 [(set (match_operand:V8HI 0 "register_operand" "=x")
7323 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7324 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7325 (match_operand 3 "const_0_to_3_operand" "")
7326 (match_operand 4 "const_0_to_3_operand" "")
7327 (match_operand 5 "const_0_to_3_operand" "")
7335 mask |= INTVAL (operands[2]) << 0;
7336 mask |= INTVAL (operands[3]) << 2;
7337 mask |= INTVAL (operands[4]) << 4;
7338 mask |= INTVAL (operands[5]) << 6;
7339 operands[2] = GEN_INT (mask);
7341 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7343 [(set_attr "type" "sselog")
7344 (set_attr "prefix_data16" "0")
7345 (set_attr "prefix_rep" "1")
7346 (set_attr "prefix" "maybe_vex")
7347 (set_attr "length_immediate" "1")
7348 (set_attr "mode" "TI")])
7350 (define_expand "sse2_pshufhw"
7351 [(match_operand:V8HI 0 "register_operand" "")
7352 (match_operand:V8HI 1 "nonimmediate_operand" "")
7353 (match_operand:SI 2 "const_int_operand" "")]
7356 int mask = INTVAL (operands[2]);
7357 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7358 GEN_INT (((mask >> 0) & 3) + 4),
7359 GEN_INT (((mask >> 2) & 3) + 4),
7360 GEN_INT (((mask >> 4) & 3) + 4),
7361 GEN_INT (((mask >> 6) & 3) + 4)));
7365 (define_insn "sse2_pshufhw_1"
7366 [(set (match_operand:V8HI 0 "register_operand" "=x")
7368 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7369 (parallel [(const_int 0)
7373 (match_operand 2 "const_4_to_7_operand" "")
7374 (match_operand 3 "const_4_to_7_operand" "")
7375 (match_operand 4 "const_4_to_7_operand" "")
7376 (match_operand 5 "const_4_to_7_operand" "")])))]
7380 mask |= (INTVAL (operands[2]) - 4) << 0;
7381 mask |= (INTVAL (operands[3]) - 4) << 2;
7382 mask |= (INTVAL (operands[4]) - 4) << 4;
7383 mask |= (INTVAL (operands[5]) - 4) << 6;
7384 operands[2] = GEN_INT (mask);
7386 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7388 [(set_attr "type" "sselog")
7389 (set_attr "prefix_rep" "1")
7390 (set_attr "prefix_data16" "0")
7391 (set_attr "prefix" "maybe_vex")
7392 (set_attr "length_immediate" "1")
7393 (set_attr "mode" "TI")])
7395 (define_expand "sse2_loadd"
7396 [(set (match_operand:V4SI 0 "register_operand" "")
7399 (match_operand:SI 1 "nonimmediate_operand" ""))
7403 "operands[2] = CONST0_RTX (V4SImode);")
7405 (define_insn "*avx_loadld"
7406 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7409 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7410 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7414 vmovd\t{%2, %0|%0, %2}
7415 vmovd\t{%2, %0|%0, %2}
7416 vmovss\t{%2, %1, %0|%0, %1, %2}"
7417 [(set_attr "type" "ssemov")
7418 (set_attr "prefix" "vex")
7419 (set_attr "mode" "TI,TI,V4SF")])
7421 (define_insn "sse2_loadld"
7422 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7425 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7426 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7430 movd\t{%2, %0|%0, %2}
7431 movd\t{%2, %0|%0, %2}
7432 movss\t{%2, %0|%0, %2}
7433 movss\t{%2, %0|%0, %2}"
7434 [(set_attr "type" "ssemov")
7435 (set_attr "mode" "TI,TI,V4SF,SF")])
7437 (define_insn_and_split "sse2_stored"
7438 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7440 (match_operand:V4SI 1 "register_operand" "x,Yi")
7441 (parallel [(const_int 0)])))]
7444 "&& reload_completed
7445 && (TARGET_INTER_UNIT_MOVES
7446 || MEM_P (operands [0])
7447 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7448 [(set (match_dup 0) (match_dup 1))]
7449 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7451 (define_insn_and_split "*vec_ext_v4si_mem"
7452 [(set (match_operand:SI 0 "register_operand" "=r")
7454 (match_operand:V4SI 1 "memory_operand" "o")
7455 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7461 int i = INTVAL (operands[2]);
7463 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7467 (define_expand "sse_storeq"
7468 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7470 (match_operand:V2DI 1 "register_operand" "")
7471 (parallel [(const_int 0)])))]
7474 (define_insn "*sse2_storeq_rex64"
7475 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7477 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7478 (parallel [(const_int 0)])))]
7479 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7483 %vmov{q}\t{%1, %0|%0, %1}"
7484 [(set_attr "type" "*,*,imov")
7485 (set_attr "prefix" "*,*,maybe_vex")
7486 (set_attr "mode" "*,*,DI")])
7488 (define_insn "*sse2_storeq"
7489 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7491 (match_operand:V2DI 1 "register_operand" "x")
7492 (parallel [(const_int 0)])))]
7497 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7499 (match_operand:V2DI 1 "register_operand" "")
7500 (parallel [(const_int 0)])))]
7503 && (TARGET_INTER_UNIT_MOVES
7504 || MEM_P (operands [0])
7505 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7506 [(set (match_dup 0) (match_dup 1))]
7507 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7509 (define_insn "*vec_extractv2di_1_rex64_avx"
7510 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7512 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7513 (parallel [(const_int 1)])))]
7516 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7518 vmovhps\t{%1, %0|%0, %1}
7519 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7520 vmovq\t{%H1, %0|%0, %H1}
7521 vmov{q}\t{%H1, %0|%0, %H1}"
7522 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7523 (set_attr "length_immediate" "*,1,*,*")
7524 (set_attr "memory" "*,none,*,*")
7525 (set_attr "prefix" "vex")
7526 (set_attr "mode" "V2SF,TI,TI,DI")])
7528 (define_insn "*vec_extractv2di_1_rex64"
7529 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7531 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7532 (parallel [(const_int 1)])))]
7533 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7535 movhps\t{%1, %0|%0, %1}
7536 psrldq\t{$8, %0|%0, 8}
7537 movq\t{%H1, %0|%0, %H1}
7538 mov{q}\t{%H1, %0|%0, %H1}"
7539 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7540 (set_attr "length_immediate" "*,1,*,*")
7541 (set_attr "memory" "*,none,*,*")
7542 (set_attr "mode" "V2SF,TI,TI,DI")])
7544 (define_insn "*vec_extractv2di_1_avx"
7545 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7547 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7548 (parallel [(const_int 1)])))]
7551 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7553 vmovhps\t{%1, %0|%0, %1}
7554 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7555 vmovq\t{%H1, %0|%0, %H1}"
7556 [(set_attr "type" "ssemov,sseishft1,ssemov")
7557 (set_attr "length_immediate" "*,1,*")
7558 (set_attr "memory" "*,none,*")
7559 (set_attr "prefix" "vex")
7560 (set_attr "mode" "V2SF,TI,TI")])
7562 (define_insn "*vec_extractv2di_1_sse2"
7563 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7565 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7566 (parallel [(const_int 1)])))]
7568 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7570 movhps\t{%1, %0|%0, %1}
7571 psrldq\t{$8, %0|%0, 8}
7572 movq\t{%H1, %0|%0, %H1}"
7573 [(set_attr "type" "ssemov,sseishft1,ssemov")
7574 (set_attr "length_immediate" "*,1,*")
7575 (set_attr "memory" "*,none,*")
7576 (set_attr "mode" "V2SF,TI,TI")])
7578 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7579 (define_insn "*vec_extractv2di_1_sse"
7580 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7582 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7583 (parallel [(const_int 1)])))]
7584 "!TARGET_SSE2 && TARGET_SSE
7585 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7587 movhps\t{%1, %0|%0, %1}
7588 movhlps\t{%1, %0|%0, %1}
7589 movlps\t{%H1, %0|%0, %H1}"
7590 [(set_attr "type" "ssemov")
7591 (set_attr "mode" "V2SF,V4SF,V2SF")])
7593 (define_insn "*vec_dupv4si_avx"
7594 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7596 (match_operand:SI 1 "register_operand" "x,m")))]
7599 vpshufd\t{$0, %1, %0|%0, %1, 0}
7600 vbroadcastss\t{%1, %0|%0, %1}"
7601 [(set_attr "type" "sselog1,ssemov")
7602 (set_attr "length_immediate" "1,0")
7603 (set_attr "prefix_extra" "0,1")
7604 (set_attr "prefix" "vex")
7605 (set_attr "mode" "TI,V4SF")])
7607 (define_insn "*vec_dupv4si"
7608 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7610 (match_operand:SI 1 "register_operand" " Y2,0")))]
7613 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7614 shufps\t{$0, %0, %0|%0, %0, 0}"
7615 [(set_attr "type" "sselog1")
7616 (set_attr "length_immediate" "1")
7617 (set_attr "mode" "TI,V4SF")])
7619 (define_insn "*vec_dupv2di_avx"
7620 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7622 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7625 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7626 vmovddup\t{%1, %0|%0, %1}"
7627 [(set_attr "type" "sselog1")
7628 (set_attr "prefix" "vex")
7629 (set_attr "mode" "TI,DF")])
7631 (define_insn "*vec_dupv2di_sse3"
7632 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7634 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7638 movddup\t{%1, %0|%0, %1}"
7639 [(set_attr "type" "sselog1")
7640 (set_attr "mode" "TI,DF")])
7642 (define_insn "*vec_dupv2di"
7643 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7645 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7650 [(set_attr "type" "sselog1,ssemov")
7651 (set_attr "mode" "TI,V4SF")])
7653 (define_insn "*vec_concatv2si_avx"
7654 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7656 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7657 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7660 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7661 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7662 vmovd\t{%1, %0|%0, %1}
7663 punpckldq\t{%2, %0|%0, %2}
7664 movd\t{%1, %0|%0, %1}"
7665 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7666 (set_attr "prefix_extra" "1,*,*,*,*")
7667 (set_attr "length_immediate" "1,*,*,*,*")
7668 (set (attr "prefix")
7669 (if_then_else (eq_attr "alternative" "3,4")
7670 (const_string "orig")
7671 (const_string "vex")))
7672 (set_attr "mode" "TI,TI,TI,DI,DI")])
7674 (define_insn "*vec_concatv2si_sse4_1"
7675 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7677 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7678 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7681 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7682 punpckldq\t{%2, %0|%0, %2}
7683 movd\t{%1, %0|%0, %1}
7684 punpckldq\t{%2, %0|%0, %2}
7685 movd\t{%1, %0|%0, %1}"
7686 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7687 (set_attr "prefix_extra" "1,*,*,*,*")
7688 (set_attr "length_immediate" "1,*,*,*,*")
7689 (set_attr "mode" "TI,TI,TI,DI,DI")])
7691 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7692 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7693 ;; alternatives pretty much forces the MMX alternative to be chosen.
7694 (define_insn "*vec_concatv2si_sse2"
7695 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7697 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7698 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7701 punpckldq\t{%2, %0|%0, %2}
7702 movd\t{%1, %0|%0, %1}
7703 punpckldq\t{%2, %0|%0, %2}
7704 movd\t{%1, %0|%0, %1}"
7705 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7706 (set_attr "mode" "TI,TI,DI,DI")])
7708 (define_insn "*vec_concatv2si_sse"
7709 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7711 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7712 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7715 unpcklps\t{%2, %0|%0, %2}
7716 movss\t{%1, %0|%0, %1}
7717 punpckldq\t{%2, %0|%0, %2}
7718 movd\t{%1, %0|%0, %1}"
7719 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7720 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7722 (define_insn "*vec_concatv4si_1_avx"
7723 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7725 (match_operand:V2SI 1 "register_operand" " x,x")
7726 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7729 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7730 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7731 [(set_attr "type" "sselog,ssemov")
7732 (set_attr "prefix" "vex")
7733 (set_attr "mode" "TI,V2SF")])
7735 (define_insn "*vec_concatv4si_1"
7736 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7738 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7739 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7742 punpcklqdq\t{%2, %0|%0, %2}
7743 movlhps\t{%2, %0|%0, %2}
7744 movhps\t{%2, %0|%0, %2}"
7745 [(set_attr "type" "sselog,ssemov,ssemov")
7746 (set_attr "mode" "TI,V4SF,V2SF")])
7748 (define_insn "*vec_concatv2di_avx"
7749 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7751 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7752 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7753 "!TARGET_64BIT && TARGET_AVX"
7755 vmovq\t{%1, %0|%0, %1}
7756 movq2dq\t{%1, %0|%0, %1}
7757 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7758 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7759 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7760 (set (attr "prefix")
7761 (if_then_else (eq_attr "alternative" "1")
7762 (const_string "orig")
7763 (const_string "vex")))
7764 (set_attr "mode" "TI,TI,TI,V2SF")])
7766 (define_insn "vec_concatv2di"
7767 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7769 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7770 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7771 "!TARGET_64BIT && TARGET_SSE"
7773 movq\t{%1, %0|%0, %1}
7774 movq2dq\t{%1, %0|%0, %1}
7775 punpcklqdq\t{%2, %0|%0, %2}
7776 movlhps\t{%2, %0|%0, %2}
7777 movhps\t{%2, %0|%0, %2}"
7778 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7779 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7781 (define_insn "*vec_concatv2di_rex64_avx"
7782 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7784 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7785 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7786 "TARGET_64BIT && TARGET_AVX"
7788 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7789 vmovq\t{%1, %0|%0, %1}
7790 vmovq\t{%1, %0|%0, %1}
7791 movq2dq\t{%1, %0|%0, %1}
7792 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7793 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7794 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7795 (set_attr "prefix_extra" "1,*,*,*,*,*")
7796 (set_attr "length_immediate" "1,*,*,*,*,*")
7797 (set (attr "prefix")
7798 (if_then_else (eq_attr "alternative" "3")
7799 (const_string "orig")
7800 (const_string "vex")))
7801 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7803 (define_insn "*vec_concatv2di_rex64_sse4_1"
7804 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7806 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7807 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7808 "TARGET_64BIT && TARGET_SSE4_1"
7810 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7811 movq\t{%1, %0|%0, %1}
7812 movq\t{%1, %0|%0, %1}
7813 movq2dq\t{%1, %0|%0, %1}
7814 punpcklqdq\t{%2, %0|%0, %2}
7815 movlhps\t{%2, %0|%0, %2}
7816 movhps\t{%2, %0|%0, %2}"
7817 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7818 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7819 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7820 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7821 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7823 (define_insn "*vec_concatv2di_rex64_sse"
7824 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7826 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7827 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7828 "TARGET_64BIT && TARGET_SSE"
7830 movq\t{%1, %0|%0, %1}
7831 movq\t{%1, %0|%0, %1}
7832 movq2dq\t{%1, %0|%0, %1}
7833 punpcklqdq\t{%2, %0|%0, %2}
7834 movlhps\t{%2, %0|%0, %2}
7835 movhps\t{%2, %0|%0, %2}"
7836 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7837 (set_attr "prefix_rex" "*,1,*,*,*,*")
7838 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7840 (define_expand "vec_unpacku_hi_v16qi"
7841 [(match_operand:V8HI 0 "register_operand" "")
7842 (match_operand:V16QI 1 "register_operand" "")]
7846 ix86_expand_sse4_unpack (operands, true, true);
7848 ix86_expand_sse_unpack (operands, true, true);
7852 (define_expand "vec_unpacks_hi_v16qi"
7853 [(match_operand:V8HI 0 "register_operand" "")
7854 (match_operand:V16QI 1 "register_operand" "")]
7858 ix86_expand_sse4_unpack (operands, false, true);
7860 ix86_expand_sse_unpack (operands, false, true);
7864 (define_expand "vec_unpacku_lo_v16qi"
7865 [(match_operand:V8HI 0 "register_operand" "")
7866 (match_operand:V16QI 1 "register_operand" "")]
7870 ix86_expand_sse4_unpack (operands, true, false);
7872 ix86_expand_sse_unpack (operands, true, false);
7876 (define_expand "vec_unpacks_lo_v16qi"
7877 [(match_operand:V8HI 0 "register_operand" "")
7878 (match_operand:V16QI 1 "register_operand" "")]
7882 ix86_expand_sse4_unpack (operands, false, false);
7884 ix86_expand_sse_unpack (operands, false, false);
7888 (define_expand "vec_unpacku_hi_v8hi"
7889 [(match_operand:V4SI 0 "register_operand" "")
7890 (match_operand:V8HI 1 "register_operand" "")]
7894 ix86_expand_sse4_unpack (operands, true, true);
7896 ix86_expand_sse_unpack (operands, true, true);
7900 (define_expand "vec_unpacks_hi_v8hi"
7901 [(match_operand:V4SI 0 "register_operand" "")
7902 (match_operand:V8HI 1 "register_operand" "")]
7906 ix86_expand_sse4_unpack (operands, false, true);
7908 ix86_expand_sse_unpack (operands, false, true);
7912 (define_expand "vec_unpacku_lo_v8hi"
7913 [(match_operand:V4SI 0 "register_operand" "")
7914 (match_operand:V8HI 1 "register_operand" "")]
7918 ix86_expand_sse4_unpack (operands, true, false);
7920 ix86_expand_sse_unpack (operands, true, false);
7924 (define_expand "vec_unpacks_lo_v8hi"
7925 [(match_operand:V4SI 0 "register_operand" "")
7926 (match_operand:V8HI 1 "register_operand" "")]
7930 ix86_expand_sse4_unpack (operands, false, false);
7932 ix86_expand_sse_unpack (operands, false, false);
7936 (define_expand "vec_unpacku_hi_v4si"
7937 [(match_operand:V2DI 0 "register_operand" "")
7938 (match_operand:V4SI 1 "register_operand" "")]
7942 ix86_expand_sse4_unpack (operands, true, true);
7944 ix86_expand_sse_unpack (operands, true, true);
7948 (define_expand "vec_unpacks_hi_v4si"
7949 [(match_operand:V2DI 0 "register_operand" "")
7950 (match_operand:V4SI 1 "register_operand" "")]
7954 ix86_expand_sse4_unpack (operands, false, true);
7956 ix86_expand_sse_unpack (operands, false, true);
7960 (define_expand "vec_unpacku_lo_v4si"
7961 [(match_operand:V2DI 0 "register_operand" "")
7962 (match_operand:V4SI 1 "register_operand" "")]
7966 ix86_expand_sse4_unpack (operands, true, false);
7968 ix86_expand_sse_unpack (operands, true, false);
7972 (define_expand "vec_unpacks_lo_v4si"
7973 [(match_operand:V2DI 0 "register_operand" "")
7974 (match_operand:V4SI 1 "register_operand" "")]
7978 ix86_expand_sse4_unpack (operands, false, false);
7980 ix86_expand_sse_unpack (operands, false, false);
7984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7990 (define_expand "sse2_uavgv16qi3"
7991 [(set (match_operand:V16QI 0 "register_operand" "")
7997 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7999 (match_operand:V16QI 2 "nonimmediate_operand" "")))
8000 (const_vector:V16QI [(const_int 1) (const_int 1)
8001 (const_int 1) (const_int 1)
8002 (const_int 1) (const_int 1)
8003 (const_int 1) (const_int 1)
8004 (const_int 1) (const_int 1)
8005 (const_int 1) (const_int 1)
8006 (const_int 1) (const_int 1)
8007 (const_int 1) (const_int 1)]))
8010 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
8012 (define_insn "*avx_uavgv16qi3"
8013 [(set (match_operand:V16QI 0 "register_operand" "=x")
8019 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
8021 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8022 (const_vector:V16QI [(const_int 1) (const_int 1)
8023 (const_int 1) (const_int 1)
8024 (const_int 1) (const_int 1)
8025 (const_int 1) (const_int 1)
8026 (const_int 1) (const_int 1)
8027 (const_int 1) (const_int 1)
8028 (const_int 1) (const_int 1)
8029 (const_int 1) (const_int 1)]))
8031 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8032 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8033 [(set_attr "type" "sseiadd")
8034 (set_attr "prefix" "vex")
8035 (set_attr "mode" "TI")])
8037 (define_insn "*sse2_uavgv16qi3"
8038 [(set (match_operand:V16QI 0 "register_operand" "=x")
8044 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8046 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8047 (const_vector:V16QI [(const_int 1) (const_int 1)
8048 (const_int 1) (const_int 1)
8049 (const_int 1) (const_int 1)
8050 (const_int 1) (const_int 1)
8051 (const_int 1) (const_int 1)
8052 (const_int 1) (const_int 1)
8053 (const_int 1) (const_int 1)
8054 (const_int 1) (const_int 1)]))
8056 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8057 "pavgb\t{%2, %0|%0, %2}"
8058 [(set_attr "type" "sseiadd")
8059 (set_attr "prefix_data16" "1")
8060 (set_attr "mode" "TI")])
8062 (define_expand "sse2_uavgv8hi3"
8063 [(set (match_operand:V8HI 0 "register_operand" "")
8069 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8071 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8072 (const_vector:V8HI [(const_int 1) (const_int 1)
8073 (const_int 1) (const_int 1)
8074 (const_int 1) (const_int 1)
8075 (const_int 1) (const_int 1)]))
8078 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8080 (define_insn "*avx_uavgv8hi3"
8081 [(set (match_operand:V8HI 0 "register_operand" "=x")
8087 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8089 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8090 (const_vector:V8HI [(const_int 1) (const_int 1)
8091 (const_int 1) (const_int 1)
8092 (const_int 1) (const_int 1)
8093 (const_int 1) (const_int 1)]))
8095 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8096 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8097 [(set_attr "type" "sseiadd")
8098 (set_attr "prefix" "vex")
8099 (set_attr "mode" "TI")])
8101 (define_insn "*sse2_uavgv8hi3"
8102 [(set (match_operand:V8HI 0 "register_operand" "=x")
8108 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8110 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8111 (const_vector:V8HI [(const_int 1) (const_int 1)
8112 (const_int 1) (const_int 1)
8113 (const_int 1) (const_int 1)
8114 (const_int 1) (const_int 1)]))
8116 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8117 "pavgw\t{%2, %0|%0, %2}"
8118 [(set_attr "type" "sseiadd")
8119 (set_attr "prefix_data16" "1")
8120 (set_attr "mode" "TI")])
8122 ;; The correct representation for this is absolutely enormous, and
8123 ;; surely not generally useful.
8124 (define_insn "*avx_psadbw"
8125 [(set (match_operand:V2DI 0 "register_operand" "=x")
8126 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8127 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8130 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8131 [(set_attr "type" "sseiadd")
8132 (set_attr "prefix" "vex")
8133 (set_attr "mode" "TI")])
8135 (define_insn "sse2_psadbw"
8136 [(set (match_operand:V2DI 0 "register_operand" "=x")
8137 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8138 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8141 "psadbw\t{%2, %0|%0, %2}"
8142 [(set_attr "type" "sseiadd")
8143 (set_attr "atom_unit" "simul")
8144 (set_attr "prefix_data16" "1")
8145 (set_attr "mode" "TI")])
8147 (define_insn "avx_movmsk<ssemodesuffix>256"
8148 [(set (match_operand:SI 0 "register_operand" "=r")
8150 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8152 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8153 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8154 [(set_attr "type" "ssecvt")
8155 (set_attr "prefix" "vex")
8156 (set_attr "mode" "<MODE>")])
8158 (define_insn "<sse>_movmsk<ssemodesuffix>"
8159 [(set (match_operand:SI 0 "register_operand" "=r")
8161 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8163 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8164 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8165 [(set_attr "type" "ssemov")
8166 (set_attr "prefix" "maybe_vex")
8167 (set_attr "mode" "<MODE>")])
8169 (define_insn "sse2_pmovmskb"
8170 [(set (match_operand:SI 0 "register_operand" "=r")
8171 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8174 "%vpmovmskb\t{%1, %0|%0, %1}"
8175 [(set_attr "type" "ssemov")
8176 (set_attr "prefix_data16" "1")
8177 (set_attr "prefix" "maybe_vex")
8178 (set_attr "mode" "SI")])
8180 (define_expand "sse2_maskmovdqu"
8181 [(set (match_operand:V16QI 0 "memory_operand" "")
8182 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8183 (match_operand:V16QI 2 "register_operand" "")
8188 (define_insn "*sse2_maskmovdqu"
8189 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8190 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8191 (match_operand:V16QI 2 "register_operand" "x")
8192 (mem:V16QI (match_dup 0))]
8194 "TARGET_SSE2 && !TARGET_64BIT"
8195 ;; @@@ check ordering of operands in intel/nonintel syntax
8196 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8197 [(set_attr "type" "ssemov")
8198 (set_attr "prefix_data16" "1")
8199 ;; The implicit %rdi operand confuses default length_vex computation.
8200 (set_attr "length_vex" "3")
8201 (set_attr "prefix" "maybe_vex")
8202 (set_attr "mode" "TI")])
8204 (define_insn "*sse2_maskmovdqu_rex64"
8205 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8206 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8207 (match_operand:V16QI 2 "register_operand" "x")
8208 (mem:V16QI (match_dup 0))]
8210 "TARGET_SSE2 && TARGET_64BIT"
8211 ;; @@@ check ordering of operands in intel/nonintel syntax
8212 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8213 [(set_attr "type" "ssemov")
8214 (set_attr "prefix_data16" "1")
8215 ;; The implicit %rdi operand confuses default length_vex computation.
8216 (set (attr "length_vex")
8217 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8218 (set_attr "prefix" "maybe_vex")
8219 (set_attr "mode" "TI")])
8221 (define_insn "sse_ldmxcsr"
8222 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8226 [(set_attr "type" "sse")
8227 (set_attr "atom_sse_attr" "mxcsr")
8228 (set_attr "prefix" "maybe_vex")
8229 (set_attr "memory" "load")])
8231 (define_insn "sse_stmxcsr"
8232 [(set (match_operand:SI 0 "memory_operand" "=m")
8233 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8236 [(set_attr "type" "sse")
8237 (set_attr "atom_sse_attr" "mxcsr")
8238 (set_attr "prefix" "maybe_vex")
8239 (set_attr "memory" "store")])
8241 (define_expand "sse_sfence"
8243 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8244 "TARGET_SSE || TARGET_3DNOW_A"
8246 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8247 MEM_VOLATILE_P (operands[0]) = 1;
8250 (define_insn "*sse_sfence"
8251 [(set (match_operand:BLK 0 "" "")
8252 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8253 "TARGET_SSE || TARGET_3DNOW_A"
8255 [(set_attr "type" "sse")
8256 (set_attr "length_address" "0")
8257 (set_attr "atom_sse_attr" "fence")
8258 (set_attr "memory" "unknown")])
8260 (define_insn "sse2_clflush"
8261 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8265 [(set_attr "type" "sse")
8266 (set_attr "atom_sse_attr" "fence")
8267 (set_attr "memory" "unknown")])
8269 (define_expand "sse2_mfence"
8271 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8274 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8275 MEM_VOLATILE_P (operands[0]) = 1;
8278 (define_insn "*sse2_mfence"
8279 [(set (match_operand:BLK 0 "" "")
8280 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8281 "TARGET_64BIT || TARGET_SSE2"
8283 [(set_attr "type" "sse")
8284 (set_attr "length_address" "0")
8285 (set_attr "atom_sse_attr" "fence")
8286 (set_attr "memory" "unknown")])
8288 (define_expand "sse2_lfence"
8290 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8293 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8294 MEM_VOLATILE_P (operands[0]) = 1;
8297 (define_insn "*sse2_lfence"
8298 [(set (match_operand:BLK 0 "" "")
8299 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8302 [(set_attr "type" "sse")
8303 (set_attr "length_address" "0")
8304 (set_attr "atom_sse_attr" "lfence")
8305 (set_attr "memory" "unknown")])
8307 (define_insn "sse3_mwait"
8308 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8309 (match_operand:SI 1 "register_operand" "c")]
8312 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8313 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8314 ;; we only need to set up 32bit registers.
8316 [(set_attr "length" "3")])
8318 (define_insn "sse3_monitor"
8319 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8320 (match_operand:SI 1 "register_operand" "c")
8321 (match_operand:SI 2 "register_operand" "d")]
8323 "TARGET_SSE3 && !TARGET_64BIT"
8324 "monitor\t%0, %1, %2"
8325 [(set_attr "length" "3")])
8327 (define_insn "sse3_monitor64"
8328 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8329 (match_operand:SI 1 "register_operand" "c")
8330 (match_operand:SI 2 "register_operand" "d")]
8332 "TARGET_SSE3 && TARGET_64BIT"
8333 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8334 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8335 ;; zero extended to 64bit, we only need to set up 32bit registers.
8337 [(set_attr "length" "3")])
8339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8341 ;; SSSE3 instructions
8343 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8345 (define_insn "*avx_phaddwv8hi3"
8346 [(set (match_operand:V8HI 0 "register_operand" "=x")
8352 (match_operand:V8HI 1 "register_operand" "x")
8353 (parallel [(const_int 0)]))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8361 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8369 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8377 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8378 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8381 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8383 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8384 [(set_attr "type" "sseiadd")
8385 (set_attr "prefix_extra" "1")
8386 (set_attr "prefix" "vex")
8387 (set_attr "mode" "TI")])
8389 (define_insn "ssse3_phaddwv8hi3"
8390 [(set (match_operand:V8HI 0 "register_operand" "=x")
8396 (match_operand:V8HI 1 "register_operand" "0")
8397 (parallel [(const_int 0)]))
8398 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8401 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8405 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8407 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8408 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8413 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8414 (parallel [(const_int 0)]))
8415 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8418 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8421 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8422 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8425 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8427 "phaddw\t{%2, %0|%0, %2}"
8428 [(set_attr "type" "sseiadd")
8429 (set_attr "atom_unit" "complex")
8430 (set_attr "prefix_data16" "1")
8431 (set_attr "prefix_extra" "1")
8432 (set_attr "mode" "TI")])
8434 (define_insn "ssse3_phaddwv4hi3"
8435 [(set (match_operand:V4HI 0 "register_operand" "=y")
8440 (match_operand:V4HI 1 "register_operand" "0")
8441 (parallel [(const_int 0)]))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8449 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8450 (parallel [(const_int 0)]))
8451 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8454 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8456 "phaddw\t{%2, %0|%0, %2}"
8457 [(set_attr "type" "sseiadd")
8458 (set_attr "atom_unit" "complex")
8459 (set_attr "prefix_extra" "1")
8460 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8461 (set_attr "mode" "DI")])
8463 (define_insn "*avx_phadddv4si3"
8464 [(set (match_operand:V4SI 0 "register_operand" "=x")
8469 (match_operand:V4SI 1 "register_operand" "x")
8470 (parallel [(const_int 0)]))
8471 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8473 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8474 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8478 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8479 (parallel [(const_int 0)]))
8480 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8482 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8483 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8485 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8486 [(set_attr "type" "sseiadd")
8487 (set_attr "prefix_extra" "1")
8488 (set_attr "prefix" "vex")
8489 (set_attr "mode" "TI")])
8491 (define_insn "ssse3_phadddv4si3"
8492 [(set (match_operand:V4SI 0 "register_operand" "=x")
8497 (match_operand:V4SI 1 "register_operand" "0")
8498 (parallel [(const_int 0)]))
8499 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8501 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8502 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8506 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8507 (parallel [(const_int 0)]))
8508 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8510 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8511 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8513 "phaddd\t{%2, %0|%0, %2}"
8514 [(set_attr "type" "sseiadd")
8515 (set_attr "atom_unit" "complex")
8516 (set_attr "prefix_data16" "1")
8517 (set_attr "prefix_extra" "1")
8518 (set_attr "mode" "TI")])
8520 (define_insn "ssse3_phadddv2si3"
8521 [(set (match_operand:V2SI 0 "register_operand" "=y")
8525 (match_operand:V2SI 1 "register_operand" "0")
8526 (parallel [(const_int 0)]))
8527 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8530 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8531 (parallel [(const_int 0)]))
8532 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8534 "phaddd\t{%2, %0|%0, %2}"
8535 [(set_attr "type" "sseiadd")
8536 (set_attr "atom_unit" "complex")
8537 (set_attr "prefix_extra" "1")
8538 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8539 (set_attr "mode" "DI")])
8541 (define_insn "*avx_phaddswv8hi3"
8542 [(set (match_operand:V8HI 0 "register_operand" "=x")
8548 (match_operand:V8HI 1 "register_operand" "x")
8549 (parallel [(const_int 0)]))
8550 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8552 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8553 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8557 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8560 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8565 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8566 (parallel [(const_int 0)]))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8569 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8570 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8574 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8577 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8579 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8580 [(set_attr "type" "sseiadd")
8581 (set_attr "prefix_extra" "1")
8582 (set_attr "prefix" "vex")
8583 (set_attr "mode" "TI")])
8585 (define_insn "ssse3_phaddswv8hi3"
8586 [(set (match_operand:V8HI 0 "register_operand" "=x")
8592 (match_operand:V8HI 1 "register_operand" "0")
8593 (parallel [(const_int 0)]))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8596 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8603 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8604 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8609 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8610 (parallel [(const_int 0)]))
8611 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8613 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8614 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8618 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8621 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8623 "phaddsw\t{%2, %0|%0, %2}"
8624 [(set_attr "type" "sseiadd")
8625 (set_attr "atom_unit" "complex")
8626 (set_attr "prefix_data16" "1")
8627 (set_attr "prefix_extra" "1")
8628 (set_attr "mode" "TI")])
8630 (define_insn "ssse3_phaddswv4hi3"
8631 [(set (match_operand:V4HI 0 "register_operand" "=y")
8636 (match_operand:V4HI 1 "register_operand" "0")
8637 (parallel [(const_int 0)]))
8638 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8640 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8645 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8646 (parallel [(const_int 0)]))
8647 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8649 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8650 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8652 "phaddsw\t{%2, %0|%0, %2}"
8653 [(set_attr "type" "sseiadd")
8654 (set_attr "atom_unit" "complex")
8655 (set_attr "prefix_extra" "1")
8656 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8657 (set_attr "mode" "DI")])
8659 (define_insn "*avx_phsubwv8hi3"
8660 [(set (match_operand:V8HI 0 "register_operand" "=x")
8666 (match_operand:V8HI 1 "register_operand" "x")
8667 (parallel [(const_int 0)]))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8670 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8671 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8674 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8675 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8677 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8678 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8683 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8684 (parallel [(const_int 0)]))
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8687 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8691 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8692 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8694 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8695 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8697 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8698 [(set_attr "type" "sseiadd")
8699 (set_attr "prefix_extra" "1")
8700 (set_attr "prefix" "vex")
8701 (set_attr "mode" "TI")])
8703 (define_insn "ssse3_phsubwv8hi3"
8704 [(set (match_operand:V8HI 0 "register_operand" "=x")
8710 (match_operand:V8HI 1 "register_operand" "0")
8711 (parallel [(const_int 0)]))
8712 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8714 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8715 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8718 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8719 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8721 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8722 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8727 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8728 (parallel [(const_int 0)]))
8729 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8731 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8732 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8735 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8736 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8738 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8739 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8741 "phsubw\t{%2, %0|%0, %2}"
8742 [(set_attr "type" "sseiadd")
8743 (set_attr "atom_unit" "complex")
8744 (set_attr "prefix_data16" "1")
8745 (set_attr "prefix_extra" "1")
8746 (set_attr "mode" "TI")])
8748 (define_insn "ssse3_phsubwv4hi3"
8749 [(set (match_operand:V4HI 0 "register_operand" "=y")
8754 (match_operand:V4HI 1 "register_operand" "0")
8755 (parallel [(const_int 0)]))
8756 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8758 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8759 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8763 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8764 (parallel [(const_int 0)]))
8765 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8767 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8768 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8770 "phsubw\t{%2, %0|%0, %2}"
8771 [(set_attr "type" "sseiadd")
8772 (set_attr "atom_unit" "complex")
8773 (set_attr "prefix_extra" "1")
8774 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8775 (set_attr "mode" "DI")])
8777 (define_insn "*avx_phsubdv4si3"
8778 [(set (match_operand:V4SI 0 "register_operand" "=x")
8783 (match_operand:V4SI 1 "register_operand" "x")
8784 (parallel [(const_int 0)]))
8785 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8787 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8788 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8792 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8793 (parallel [(const_int 0)]))
8794 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8796 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8797 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8799 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8800 [(set_attr "type" "sseiadd")
8801 (set_attr "prefix_extra" "1")
8802 (set_attr "prefix" "vex")
8803 (set_attr "mode" "TI")])
8805 (define_insn "ssse3_phsubdv4si3"
8806 [(set (match_operand:V4SI 0 "register_operand" "=x")
8811 (match_operand:V4SI 1 "register_operand" "0")
8812 (parallel [(const_int 0)]))
8813 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8815 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8816 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8820 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8821 (parallel [(const_int 0)]))
8822 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8824 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8825 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8827 "phsubd\t{%2, %0|%0, %2}"
8828 [(set_attr "type" "sseiadd")
8829 (set_attr "atom_unit" "complex")
8830 (set_attr "prefix_data16" "1")
8831 (set_attr "prefix_extra" "1")
8832 (set_attr "mode" "TI")])
8834 (define_insn "ssse3_phsubdv2si3"
8835 [(set (match_operand:V2SI 0 "register_operand" "=y")
8839 (match_operand:V2SI 1 "register_operand" "0")
8840 (parallel [(const_int 0)]))
8841 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8844 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8845 (parallel [(const_int 0)]))
8846 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8848 "phsubd\t{%2, %0|%0, %2}"
8849 [(set_attr "type" "sseiadd")
8850 (set_attr "atom_unit" "complex")
8851 (set_attr "prefix_extra" "1")
8852 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8853 (set_attr "mode" "DI")])
8855 (define_insn "*avx_phsubswv8hi3"
8856 [(set (match_operand:V8HI 0 "register_operand" "=x")
8862 (match_operand:V8HI 1 "register_operand" "x")
8863 (parallel [(const_int 0)]))
8864 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8866 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8867 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8870 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8871 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8873 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8874 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8879 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8880 (parallel [(const_int 0)]))
8881 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8883 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8884 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8887 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8888 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8890 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8891 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8893 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8894 [(set_attr "type" "sseiadd")
8895 (set_attr "prefix_extra" "1")
8896 (set_attr "prefix" "vex")
8897 (set_attr "mode" "TI")])
8899 (define_insn "ssse3_phsubswv8hi3"
8900 [(set (match_operand:V8HI 0 "register_operand" "=x")
8906 (match_operand:V8HI 1 "register_operand" "0")
8907 (parallel [(const_int 0)]))
8908 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8910 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8911 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8914 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8915 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8917 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8918 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8923 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8924 (parallel [(const_int 0)]))
8925 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8927 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8928 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8931 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8932 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8934 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8935 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8937 "phsubsw\t{%2, %0|%0, %2}"
8938 [(set_attr "type" "sseiadd")
8939 (set_attr "atom_unit" "complex")
8940 (set_attr "prefix_data16" "1")
8941 (set_attr "prefix_extra" "1")
8942 (set_attr "mode" "TI")])
8944 (define_insn "ssse3_phsubswv4hi3"
8945 [(set (match_operand:V4HI 0 "register_operand" "=y")
8950 (match_operand:V4HI 1 "register_operand" "0")
8951 (parallel [(const_int 0)]))
8952 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8954 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8955 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8959 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8960 (parallel [(const_int 0)]))
8961 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8963 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8964 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8966 "phsubsw\t{%2, %0|%0, %2}"
8967 [(set_attr "type" "sseiadd")
8968 (set_attr "atom_unit" "complex")
8969 (set_attr "prefix_extra" "1")
8970 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8971 (set_attr "mode" "DI")])
8973 (define_insn "*avx_pmaddubsw128"
8974 [(set (match_operand:V8HI 0 "register_operand" "=x")
8979 (match_operand:V16QI 1 "register_operand" "x")
8980 (parallel [(const_int 0)
8990 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8991 (parallel [(const_int 0)
9001 (vec_select:V16QI (match_dup 1)
9002 (parallel [(const_int 1)
9011 (vec_select:V16QI (match_dup 2)
9012 (parallel [(const_int 1)
9019 (const_int 15)]))))))]
9021 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9022 [(set_attr "type" "sseiadd")
9023 (set_attr "prefix_extra" "1")
9024 (set_attr "prefix" "vex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "ssse3_pmaddubsw128"
9028 [(set (match_operand:V8HI 0 "register_operand" "=x")
9033 (match_operand:V16QI 1 "register_operand" "0")
9034 (parallel [(const_int 0)
9044 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9045 (parallel [(const_int 0)
9055 (vec_select:V16QI (match_dup 1)
9056 (parallel [(const_int 1)
9065 (vec_select:V16QI (match_dup 2)
9066 (parallel [(const_int 1)
9073 (const_int 15)]))))))]
9075 "pmaddubsw\t{%2, %0|%0, %2}"
9076 [(set_attr "type" "sseiadd")
9077 (set_attr "atom_unit" "simul")
9078 (set_attr "prefix_data16" "1")
9079 (set_attr "prefix_extra" "1")
9080 (set_attr "mode" "TI")])
9082 (define_insn "ssse3_pmaddubsw"
9083 [(set (match_operand:V4HI 0 "register_operand" "=y")
9088 (match_operand:V8QI 1 "register_operand" "0")
9089 (parallel [(const_int 0)
9095 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9096 (parallel [(const_int 0)
9102 (vec_select:V8QI (match_dup 1)
9103 (parallel [(const_int 1)
9108 (vec_select:V8QI (match_dup 2)
9109 (parallel [(const_int 1)
9112 (const_int 7)]))))))]
9114 "pmaddubsw\t{%2, %0|%0, %2}"
9115 [(set_attr "type" "sseiadd")
9116 (set_attr "atom_unit" "simul")
9117 (set_attr "prefix_extra" "1")
9118 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9119 (set_attr "mode" "DI")])
9121 (define_expand "ssse3_pmulhrswv8hi3"
9122 [(set (match_operand:V8HI 0 "register_operand" "")
9129 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9131 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9133 (const_vector:V8HI [(const_int 1) (const_int 1)
9134 (const_int 1) (const_int 1)
9135 (const_int 1) (const_int 1)
9136 (const_int 1) (const_int 1)]))
9139 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9141 (define_insn "*avx_pmulhrswv8hi3"
9142 [(set (match_operand:V8HI 0 "register_operand" "=x")
9149 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9151 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9153 (const_vector:V8HI [(const_int 1) (const_int 1)
9154 (const_int 1) (const_int 1)
9155 (const_int 1) (const_int 1)
9156 (const_int 1) (const_int 1)]))
9158 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9159 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9160 [(set_attr "type" "sseimul")
9161 (set_attr "prefix_extra" "1")
9162 (set_attr "prefix" "vex")
9163 (set_attr "mode" "TI")])
9165 (define_insn "*ssse3_pmulhrswv8hi3"
9166 [(set (match_operand:V8HI 0 "register_operand" "=x")
9173 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9175 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9177 (const_vector:V8HI [(const_int 1) (const_int 1)
9178 (const_int 1) (const_int 1)
9179 (const_int 1) (const_int 1)
9180 (const_int 1) (const_int 1)]))
9182 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9183 "pmulhrsw\t{%2, %0|%0, %2}"
9184 [(set_attr "type" "sseimul")
9185 (set_attr "prefix_data16" "1")
9186 (set_attr "prefix_extra" "1")
9187 (set_attr "mode" "TI")])
9189 (define_expand "ssse3_pmulhrswv4hi3"
9190 [(set (match_operand:V4HI 0 "register_operand" "")
9197 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9199 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9201 (const_vector:V4HI [(const_int 1) (const_int 1)
9202 (const_int 1) (const_int 1)]))
9205 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9207 (define_insn "*ssse3_pmulhrswv4hi3"
9208 [(set (match_operand:V4HI 0 "register_operand" "=y")
9215 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9217 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9219 (const_vector:V4HI [(const_int 1) (const_int 1)
9220 (const_int 1) (const_int 1)]))
9222 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9223 "pmulhrsw\t{%2, %0|%0, %2}"
9224 [(set_attr "type" "sseimul")
9225 (set_attr "prefix_extra" "1")
9226 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9227 (set_attr "mode" "DI")])
9229 (define_insn "*avx_pshufbv16qi3"
9230 [(set (match_operand:V16QI 0 "register_operand" "=x")
9231 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9232 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9235 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9236 [(set_attr "type" "sselog1")
9237 (set_attr "prefix_extra" "1")
9238 (set_attr "prefix" "vex")
9239 (set_attr "mode" "TI")])
9241 (define_insn "ssse3_pshufbv16qi3"
9242 [(set (match_operand:V16QI 0 "register_operand" "=x")
9243 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9244 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9247 "pshufb\t{%2, %0|%0, %2}";
9248 [(set_attr "type" "sselog1")
9249 (set_attr "prefix_data16" "1")
9250 (set_attr "prefix_extra" "1")
9251 (set_attr "mode" "TI")])
9253 (define_insn "ssse3_pshufbv8qi3"
9254 [(set (match_operand:V8QI 0 "register_operand" "=y")
9255 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9256 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9259 "pshufb\t{%2, %0|%0, %2}";
9260 [(set_attr "type" "sselog1")
9261 (set_attr "prefix_extra" "1")
9262 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9263 (set_attr "mode" "DI")])
9265 (define_insn "*avx_psign<mode>3"
9266 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9268 [(match_operand:SSEMODE124 1 "register_operand" "x")
9269 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9272 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9273 [(set_attr "type" "sselog1")
9274 (set_attr "prefix_extra" "1")
9275 (set_attr "prefix" "vex")
9276 (set_attr "mode" "TI")])
9278 (define_insn "ssse3_psign<mode>3"
9279 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9281 [(match_operand:SSEMODE124 1 "register_operand" "0")
9282 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9285 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9286 [(set_attr "type" "sselog1")
9287 (set_attr "prefix_data16" "1")
9288 (set_attr "prefix_extra" "1")
9289 (set_attr "mode" "TI")])
9291 (define_insn "ssse3_psign<mode>3"
9292 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9294 [(match_operand:MMXMODEI 1 "register_operand" "0")
9295 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9298 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9299 [(set_attr "type" "sselog1")
9300 (set_attr "prefix_extra" "1")
9301 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9302 (set_attr "mode" "DI")])
9304 (define_insn "*avx_palignrti"
9305 [(set (match_operand:TI 0 "register_operand" "=x")
9306 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9307 (match_operand:TI 2 "nonimmediate_operand" "xm")
9308 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9312 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9313 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9315 [(set_attr "type" "sseishft")
9316 (set_attr "prefix_extra" "1")
9317 (set_attr "length_immediate" "1")
9318 (set_attr "prefix" "vex")
9319 (set_attr "mode" "TI")])
9321 (define_insn "ssse3_palignrti"
9322 [(set (match_operand:TI 0 "register_operand" "=x")
9323 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9324 (match_operand:TI 2 "nonimmediate_operand" "xm")
9325 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9329 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9330 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9332 [(set_attr "type" "sseishft")
9333 (set_attr "atom_unit" "sishuf")
9334 (set_attr "prefix_data16" "1")
9335 (set_attr "prefix_extra" "1")
9336 (set_attr "length_immediate" "1")
9337 (set_attr "mode" "TI")])
9339 (define_insn "ssse3_palignrdi"
9340 [(set (match_operand:DI 0 "register_operand" "=y")
9341 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9342 (match_operand:DI 2 "nonimmediate_operand" "ym")
9343 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9347 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9348 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9350 [(set_attr "type" "sseishft")
9351 (set_attr "atom_unit" "sishuf")
9352 (set_attr "prefix_extra" "1")
9353 (set_attr "length_immediate" "1")
9354 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9355 (set_attr "mode" "DI")])
9357 (define_insn "abs<mode>2"
9358 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9359 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9361 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9362 [(set_attr "type" "sselog1")
9363 (set_attr "prefix_data16" "1")
9364 (set_attr "prefix_extra" "1")
9365 (set_attr "prefix" "maybe_vex")
9366 (set_attr "mode" "TI")])
9368 (define_insn "abs<mode>2"
9369 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9370 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9372 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9373 [(set_attr "type" "sselog1")
9374 (set_attr "prefix_rep" "0")
9375 (set_attr "prefix_extra" "1")
9376 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9377 (set_attr "mode" "DI")])
9379 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9381 ;; AMD SSE4A instructions
9383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9385 (define_insn "sse4a_movnt<mode>"
9386 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9388 [(match_operand:MODEF 1 "register_operand" "x")]
9391 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9392 [(set_attr "type" "ssemov")
9393 (set_attr "mode" "<MODE>")])
9395 (define_insn "sse4a_vmmovnt<mode>"
9396 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9397 (unspec:<ssescalarmode>
9398 [(vec_select:<ssescalarmode>
9399 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9400 (parallel [(const_int 0)]))]
9403 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9404 [(set_attr "type" "ssemov")
9405 (set_attr "mode" "<ssescalarmode>")])
9407 (define_insn "sse4a_extrqi"
9408 [(set (match_operand:V2DI 0 "register_operand" "=x")
9409 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9410 (match_operand 2 "const_int_operand" "")
9411 (match_operand 3 "const_int_operand" "")]
9414 "extrq\t{%3, %2, %0|%0, %2, %3}"
9415 [(set_attr "type" "sse")
9416 (set_attr "prefix_data16" "1")
9417 (set_attr "length_immediate" "2")
9418 (set_attr "mode" "TI")])
9420 (define_insn "sse4a_extrq"
9421 [(set (match_operand:V2DI 0 "register_operand" "=x")
9422 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9423 (match_operand:V16QI 2 "register_operand" "x")]
9426 "extrq\t{%2, %0|%0, %2}"
9427 [(set_attr "type" "sse")
9428 (set_attr "prefix_data16" "1")
9429 (set_attr "mode" "TI")])
9431 (define_insn "sse4a_insertqi"
9432 [(set (match_operand:V2DI 0 "register_operand" "=x")
9433 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9434 (match_operand:V2DI 2 "register_operand" "x")
9435 (match_operand 3 "const_int_operand" "")
9436 (match_operand 4 "const_int_operand" "")]
9439 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9440 [(set_attr "type" "sseins")
9441 (set_attr "prefix_data16" "0")
9442 (set_attr "prefix_rep" "1")
9443 (set_attr "length_immediate" "2")
9444 (set_attr "mode" "TI")])
9446 (define_insn "sse4a_insertq"
9447 [(set (match_operand:V2DI 0 "register_operand" "=x")
9448 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9449 (match_operand:V2DI 2 "register_operand" "x")]
9452 "insertq\t{%2, %0|%0, %2}"
9453 [(set_attr "type" "sseins")
9454 (set_attr "prefix_data16" "0")
9455 (set_attr "prefix_rep" "1")
9456 (set_attr "mode" "TI")])
9458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9460 ;; Intel SSE4.1 instructions
9462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9464 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9465 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9466 (vec_merge:AVXMODEF2P
9467 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9468 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9469 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9471 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9472 [(set_attr "type" "ssemov")
9473 (set_attr "prefix_extra" "1")
9474 (set_attr "length_immediate" "1")
9475 (set_attr "prefix" "vex")
9476 (set_attr "mode" "<avxvecmode>")])
9478 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9479 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9481 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9482 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9483 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9486 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9487 [(set_attr "type" "ssemov")
9488 (set_attr "prefix_extra" "1")
9489 (set_attr "length_immediate" "1")
9490 (set_attr "prefix" "vex")
9491 (set_attr "mode" "<avxvecmode>")])
9493 (define_insn "sse4_1_blend<ssemodesuffix>"
9494 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9495 (vec_merge:SSEMODEF2P
9496 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9497 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9498 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9500 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9501 [(set_attr "type" "ssemov")
9502 (set_attr "prefix_data16" "1")
9503 (set_attr "prefix_extra" "1")
9504 (set_attr "length_immediate" "1")
9505 (set_attr "mode" "<MODE>")])
9507 (define_insn "sse4_1_blendv<ssemodesuffix>"
9508 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9510 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9511 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9512 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9515 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9516 [(set_attr "type" "ssemov")
9517 (set_attr "prefix_data16" "1")
9518 (set_attr "prefix_extra" "1")
9519 (set_attr "mode" "<MODE>")])
9521 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9522 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9524 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9525 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9526 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9529 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9530 [(set_attr "type" "ssemul")
9531 (set_attr "prefix" "vex")
9532 (set_attr "prefix_extra" "1")
9533 (set_attr "length_immediate" "1")
9534 (set_attr "mode" "<avxvecmode>")])
9536 (define_insn "sse4_1_dp<ssemodesuffix>"
9537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9539 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9541 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9544 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9545 [(set_attr "type" "ssemul")
9546 (set_attr "prefix_data16" "1")
9547 (set_attr "prefix_extra" "1")
9548 (set_attr "length_immediate" "1")
9549 (set_attr "mode" "<MODE>")])
9551 (define_insn "sse4_1_movntdqa"
9552 [(set (match_operand:V2DI 0 "register_operand" "=x")
9553 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9556 "%vmovntdqa\t{%1, %0|%0, %1}"
9557 [(set_attr "type" "ssemov")
9558 (set_attr "prefix_extra" "1")
9559 (set_attr "prefix" "maybe_vex")
9560 (set_attr "mode" "TI")])
9562 (define_insn "*avx_mpsadbw"
9563 [(set (match_operand:V16QI 0 "register_operand" "=x")
9564 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9565 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9566 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9569 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9570 [(set_attr "type" "sselog1")
9571 (set_attr "prefix" "vex")
9572 (set_attr "prefix_extra" "1")
9573 (set_attr "length_immediate" "1")
9574 (set_attr "mode" "TI")])
9576 (define_insn "sse4_1_mpsadbw"
9577 [(set (match_operand:V16QI 0 "register_operand" "=x")
9578 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9579 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9580 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9583 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9584 [(set_attr "type" "sselog1")
9585 (set_attr "prefix_extra" "1")
9586 (set_attr "length_immediate" "1")
9587 (set_attr "mode" "TI")])
9589 (define_insn "*avx_packusdw"
9590 [(set (match_operand:V8HI 0 "register_operand" "=x")
9593 (match_operand:V4SI 1 "register_operand" "x"))
9595 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9597 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9598 [(set_attr "type" "sselog")
9599 (set_attr "prefix_extra" "1")
9600 (set_attr "prefix" "vex")
9601 (set_attr "mode" "TI")])
9603 (define_insn "sse4_1_packusdw"
9604 [(set (match_operand:V8HI 0 "register_operand" "=x")
9607 (match_operand:V4SI 1 "register_operand" "0"))
9609 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9611 "packusdw\t{%2, %0|%0, %2}"
9612 [(set_attr "type" "sselog")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "mode" "TI")])
9616 (define_insn "*avx_pblendvb"
9617 [(set (match_operand:V16QI 0 "register_operand" "=x")
9618 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9619 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9620 (match_operand:V16QI 3 "register_operand" "x")]
9623 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9624 [(set_attr "type" "ssemov")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "length_immediate" "1")
9627 (set_attr "prefix" "vex")
9628 (set_attr "mode" "TI")])
9630 (define_insn "sse4_1_pblendvb"
9631 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9632 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9633 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9634 (match_operand:V16QI 3 "register_operand" "Yz")]
9637 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9638 [(set_attr "type" "ssemov")
9639 (set_attr "prefix_extra" "1")
9640 (set_attr "mode" "TI")])
9642 (define_insn "*avx_pblendw"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9645 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9646 (match_operand:V8HI 1 "register_operand" "x")
9647 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9649 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9650 [(set_attr "type" "ssemov")
9651 (set_attr "prefix" "vex")
9652 (set_attr "prefix_extra" "1")
9653 (set_attr "length_immediate" "1")
9654 (set_attr "mode" "TI")])
9656 (define_insn "sse4_1_pblendw"
9657 [(set (match_operand:V8HI 0 "register_operand" "=x")
9659 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9660 (match_operand:V8HI 1 "register_operand" "0")
9661 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9663 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9664 [(set_attr "type" "ssemov")
9665 (set_attr "prefix_extra" "1")
9666 (set_attr "length_immediate" "1")
9667 (set_attr "mode" "TI")])
9669 (define_insn "sse4_1_phminposuw"
9670 [(set (match_operand:V8HI 0 "register_operand" "=x")
9671 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9672 UNSPEC_PHMINPOSUW))]
9674 "%vphminposuw\t{%1, %0|%0, %1}"
9675 [(set_attr "type" "sselog1")
9676 (set_attr "prefix_extra" "1")
9677 (set_attr "prefix" "maybe_vex")
9678 (set_attr "mode" "TI")])
9680 (define_insn "sse4_1_<code>v8qiv8hi2"
9681 [(set (match_operand:V8HI 0 "register_operand" "=x")
9684 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9685 (parallel [(const_int 0)
9694 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9695 [(set_attr "type" "ssemov")
9696 (set_attr "prefix_extra" "1")
9697 (set_attr "prefix" "maybe_vex")
9698 (set_attr "mode" "TI")])
9700 (define_insn "sse4_1_<code>v4qiv4si2"
9701 [(set (match_operand:V4SI 0 "register_operand" "=x")
9704 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9705 (parallel [(const_int 0)
9710 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9711 [(set_attr "type" "ssemov")
9712 (set_attr "prefix_extra" "1")
9713 (set_attr "prefix" "maybe_vex")
9714 (set_attr "mode" "TI")])
9716 (define_insn "sse4_1_<code>v4hiv4si2"
9717 [(set (match_operand:V4SI 0 "register_operand" "=x")
9720 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9721 (parallel [(const_int 0)
9726 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9727 [(set_attr "type" "ssemov")
9728 (set_attr "prefix_extra" "1")
9729 (set_attr "prefix" "maybe_vex")
9730 (set_attr "mode" "TI")])
9732 (define_insn "sse4_1_<code>v2qiv2di2"
9733 [(set (match_operand:V2DI 0 "register_operand" "=x")
9736 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9737 (parallel [(const_int 0)
9740 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9741 [(set_attr "type" "ssemov")
9742 (set_attr "prefix_extra" "1")
9743 (set_attr "prefix" "maybe_vex")
9744 (set_attr "mode" "TI")])
9746 (define_insn "sse4_1_<code>v2hiv2di2"
9747 [(set (match_operand:V2DI 0 "register_operand" "=x")
9750 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9751 (parallel [(const_int 0)
9754 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9755 [(set_attr "type" "ssemov")
9756 (set_attr "prefix_extra" "1")
9757 (set_attr "prefix" "maybe_vex")
9758 (set_attr "mode" "TI")])
9760 (define_insn "sse4_1_<code>v2siv2di2"
9761 [(set (match_operand:V2DI 0 "register_operand" "=x")
9764 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9765 (parallel [(const_int 0)
9768 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9769 [(set_attr "type" "ssemov")
9770 (set_attr "prefix_extra" "1")
9771 (set_attr "prefix" "maybe_vex")
9772 (set_attr "mode" "TI")])
9774 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9775 ;; setting FLAGS_REG. But it is not a really compare instruction.
9776 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9777 [(set (reg:CC FLAGS_REG)
9778 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9779 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9782 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9783 [(set_attr "type" "ssecomi")
9784 (set_attr "prefix_extra" "1")
9785 (set_attr "prefix" "vex")
9786 (set_attr "mode" "<MODE>")])
9788 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9789 ;; But it is not a really compare instruction.
9790 (define_insn "avx_ptest256"
9791 [(set (reg:CC FLAGS_REG)
9792 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9793 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9796 "vptest\t{%1, %0|%0, %1}"
9797 [(set_attr "type" "ssecomi")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "vex")
9800 (set_attr "mode" "OI")])
9802 (define_insn "sse4_1_ptest"
9803 [(set (reg:CC FLAGS_REG)
9804 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9805 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9808 "%vptest\t{%1, %0|%0, %1}"
9809 [(set_attr "type" "ssecomi")
9810 (set_attr "prefix_extra" "1")
9811 (set_attr "prefix" "maybe_vex")
9812 (set_attr "mode" "TI")])
9814 (define_insn "avx_round<ssemodesuffix>256"
9815 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9816 (unspec:AVX256MODEF2P
9817 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9818 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9821 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9822 [(set_attr "type" "ssecvt")
9823 (set_attr "prefix_extra" "1")
9824 (set_attr "length_immediate" "1")
9825 (set_attr "prefix" "vex")
9826 (set_attr "mode" "<MODE>")])
9828 (define_insn "sse4_1_round<ssemodesuffix>"
9829 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9831 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9832 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9835 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9836 [(set_attr "type" "ssecvt")
9837 (set_attr "prefix_data16" "1")
9838 (set_attr "prefix_extra" "1")
9839 (set_attr "length_immediate" "1")
9840 (set_attr "prefix" "maybe_vex")
9841 (set_attr "mode" "<MODE>")])
9843 (define_insn "*avx_round<ssescalarmodesuffix>"
9844 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9845 (vec_merge:SSEMODEF2P
9847 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9848 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9850 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9853 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9854 [(set_attr "type" "ssecvt")
9855 (set_attr "prefix_extra" "1")
9856 (set_attr "length_immediate" "1")
9857 (set_attr "prefix" "vex")
9858 (set_attr "mode" "<MODE>")])
9860 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9861 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9862 (vec_merge:SSEMODEF2P
9864 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9865 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9867 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9870 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9871 [(set_attr "type" "ssecvt")
9872 (set_attr "prefix_data16" "1")
9873 (set_attr "prefix_extra" "1")
9874 (set_attr "length_immediate" "1")
9875 (set_attr "mode" "<MODE>")])
9877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9879 ;; Intel SSE4.2 string/text processing instructions
9881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9883 (define_insn_and_split "sse4_2_pcmpestr"
9884 [(set (match_operand:SI 0 "register_operand" "=c,c")
9886 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9887 (match_operand:SI 3 "register_operand" "a,a")
9888 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9889 (match_operand:SI 5 "register_operand" "d,d")
9890 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9892 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9900 (set (reg:CC FLAGS_REG)
9909 && can_create_pseudo_p ()"
9914 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9915 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9916 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9919 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9920 operands[3], operands[4],
9921 operands[5], operands[6]));
9923 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9924 operands[3], operands[4],
9925 operands[5], operands[6]));
9926 if (flags && !(ecx || xmm0))
9927 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9928 operands[2], operands[3],
9929 operands[4], operands[5],
9933 [(set_attr "type" "sselog")
9934 (set_attr "prefix_data16" "1")
9935 (set_attr "prefix_extra" "1")
9936 (set_attr "length_immediate" "1")
9937 (set_attr "memory" "none,load")
9938 (set_attr "mode" "TI")])
9940 (define_insn "sse4_2_pcmpestri"
9941 [(set (match_operand:SI 0 "register_operand" "=c,c")
9943 [(match_operand:V16QI 1 "register_operand" "x,x")
9944 (match_operand:SI 2 "register_operand" "a,a")
9945 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9946 (match_operand:SI 4 "register_operand" "d,d")
9947 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9949 (set (reg:CC FLAGS_REG)
9958 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9959 [(set_attr "type" "sselog")
9960 (set_attr "prefix_data16" "1")
9961 (set_attr "prefix_extra" "1")
9962 (set_attr "prefix" "maybe_vex")
9963 (set_attr "length_immediate" "1")
9964 (set_attr "memory" "none,load")
9965 (set_attr "mode" "TI")])
9967 (define_insn "sse4_2_pcmpestrm"
9968 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9970 [(match_operand:V16QI 1 "register_operand" "x,x")
9971 (match_operand:SI 2 "register_operand" "a,a")
9972 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9973 (match_operand:SI 4 "register_operand" "d,d")
9974 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9976 (set (reg:CC FLAGS_REG)
9985 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9986 [(set_attr "type" "sselog")
9987 (set_attr "prefix_data16" "1")
9988 (set_attr "prefix_extra" "1")
9989 (set_attr "length_immediate" "1")
9990 (set_attr "prefix" "maybe_vex")
9991 (set_attr "memory" "none,load")
9992 (set_attr "mode" "TI")])
9994 (define_insn "sse4_2_pcmpestr_cconly"
9995 [(set (reg:CC FLAGS_REG)
9997 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9998 (match_operand:SI 3 "register_operand" "a,a,a,a")
9999 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10000 (match_operand:SI 5 "register_operand" "d,d,d,d")
10001 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10003 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10004 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10007 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10008 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10009 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10010 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10011 [(set_attr "type" "sselog")
10012 (set_attr "prefix_data16" "1")
10013 (set_attr "prefix_extra" "1")
10014 (set_attr "length_immediate" "1")
10015 (set_attr "memory" "none,load,none,load")
10016 (set_attr "prefix" "maybe_vex")
10017 (set_attr "mode" "TI")])
10019 (define_insn_and_split "sse4_2_pcmpistr"
10020 [(set (match_operand:SI 0 "register_operand" "=c,c")
10022 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10023 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10024 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10026 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10032 (set (reg:CC FLAGS_REG)
10039 && can_create_pseudo_p ()"
10044 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10045 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10046 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10049 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10050 operands[3], operands[4]));
10052 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10053 operands[3], operands[4]));
10054 if (flags && !(ecx || xmm0))
10055 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10056 operands[2], operands[3],
10060 [(set_attr "type" "sselog")
10061 (set_attr "prefix_data16" "1")
10062 (set_attr "prefix_extra" "1")
10063 (set_attr "length_immediate" "1")
10064 (set_attr "memory" "none,load")
10065 (set_attr "mode" "TI")])
10067 (define_insn "sse4_2_pcmpistri"
10068 [(set (match_operand:SI 0 "register_operand" "=c,c")
10070 [(match_operand:V16QI 1 "register_operand" "x,x")
10071 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10072 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10074 (set (reg:CC FLAGS_REG)
10081 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10082 [(set_attr "type" "sselog")
10083 (set_attr "prefix_data16" "1")
10084 (set_attr "prefix_extra" "1")
10085 (set_attr "length_immediate" "1")
10086 (set_attr "prefix" "maybe_vex")
10087 (set_attr "memory" "none,load")
10088 (set_attr "mode" "TI")])
10090 (define_insn "sse4_2_pcmpistrm"
10091 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10093 [(match_operand:V16QI 1 "register_operand" "x,x")
10094 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10095 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10097 (set (reg:CC FLAGS_REG)
10104 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10105 [(set_attr "type" "sselog")
10106 (set_attr "prefix_data16" "1")
10107 (set_attr "prefix_extra" "1")
10108 (set_attr "length_immediate" "1")
10109 (set_attr "prefix" "maybe_vex")
10110 (set_attr "memory" "none,load")
10111 (set_attr "mode" "TI")])
10113 (define_insn "sse4_2_pcmpistr_cconly"
10114 [(set (reg:CC FLAGS_REG)
10116 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10117 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10118 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10120 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10121 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10124 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10125 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10126 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10127 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10128 [(set_attr "type" "sselog")
10129 (set_attr "prefix_data16" "1")
10130 (set_attr "prefix_extra" "1")
10131 (set_attr "length_immediate" "1")
10132 (set_attr "memory" "none,load,none,load")
10133 (set_attr "prefix" "maybe_vex")
10134 (set_attr "mode" "TI")])
10136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10138 ;; XOP instructions
10140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10142 ;; XOP parallel integer multiply/add instructions.
10143 ;; Note the XOP multiply/add instructions
10144 ;; a[i] = b[i] * c[i] + d[i];
10145 ;; do not allow the value being added to be a memory operation.
10146 (define_insn "xop_pmacsww"
10147 [(set (match_operand:V8HI 0 "register_operand" "=x")
10150 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10151 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10152 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10154 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10155 [(set_attr "type" "ssemuladd")
10156 (set_attr "mode" "TI")])
10158 (define_insn "xop_pmacssww"
10159 [(set (match_operand:V8HI 0 "register_operand" "=x")
10161 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10162 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10163 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10165 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10166 [(set_attr "type" "ssemuladd")
10167 (set_attr "mode" "TI")])
10169 (define_insn "xop_pmacsdd"
10170 [(set (match_operand:V4SI 0 "register_operand" "=x")
10173 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10174 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10175 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10177 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10178 [(set_attr "type" "ssemuladd")
10179 (set_attr "mode" "TI")])
10181 (define_insn "xop_pmacssdd"
10182 [(set (match_operand:V4SI 0 "register_operand" "=x")
10184 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10185 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10186 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10188 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10189 [(set_attr "type" "ssemuladd")
10190 (set_attr "mode" "TI")])
10192 (define_insn "xop_pmacssdql"
10193 [(set (match_operand:V2DI 0 "register_operand" "=x")
10198 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10199 (parallel [(const_int 1)
10202 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10203 (parallel [(const_int 1)
10205 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10207 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10208 [(set_attr "type" "ssemuladd")
10209 (set_attr "mode" "TI")])
10211 (define_insn "xop_pmacssdqh"
10212 [(set (match_operand:V2DI 0 "register_operand" "=x")
10217 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10218 (parallel [(const_int 0)
10222 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10223 (parallel [(const_int 0)
10225 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10227 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10228 [(set_attr "type" "ssemuladd")
10229 (set_attr "mode" "TI")])
10231 (define_insn "xop_pmacsdql"
10232 [(set (match_operand:V2DI 0 "register_operand" "=x")
10237 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10238 (parallel [(const_int 1)
10242 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10243 (parallel [(const_int 1)
10245 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10247 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10248 [(set_attr "type" "ssemuladd")
10249 (set_attr "mode" "TI")])
10251 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10252 ;; fake it with a multiply/add. In general, we expect the define_split to
10253 ;; occur before register allocation, so we have to handle the corner case where
10254 ;; the target is the same as operands 1/2
10255 (define_insn_and_split "xop_mulv2div2di3_low"
10256 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10260 (match_operand:V4SI 1 "register_operand" "%x")
10261 (parallel [(const_int 1)
10265 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10266 (parallel [(const_int 1)
10267 (const_int 3)])))))]
10270 "&& reload_completed"
10271 [(set (match_dup 0)
10279 (parallel [(const_int 1)
10284 (parallel [(const_int 1)
10288 operands[3] = CONST0_RTX (V2DImode);
10290 [(set_attr "type" "ssemul")
10291 (set_attr "mode" "TI")])
10293 (define_insn "xop_pmacsdqh"
10294 [(set (match_operand:V2DI 0 "register_operand" "=x")
10299 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10300 (parallel [(const_int 0)
10304 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10305 (parallel [(const_int 0)
10307 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10309 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10310 [(set_attr "type" "ssemuladd")
10311 (set_attr "mode" "TI")])
10313 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10314 ;; fake it with a multiply/add. In general, we expect the define_split to
10315 ;; occur before register allocation, so we have to handle the corner case where
10316 ;; the target is the same as either operands[1] or operands[2]
10317 (define_insn_and_split "xop_mulv2div2di3_high"
10318 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10322 (match_operand:V4SI 1 "register_operand" "%x")
10323 (parallel [(const_int 0)
10327 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10328 (parallel [(const_int 0)
10329 (const_int 2)])))))]
10332 "&& reload_completed"
10333 [(set (match_dup 0)
10341 (parallel [(const_int 0)
10346 (parallel [(const_int 0)
10350 operands[3] = CONST0_RTX (V2DImode);
10352 [(set_attr "type" "ssemul")
10353 (set_attr "mode" "TI")])
10355 ;; XOP parallel integer multiply/add instructions for the intrinisics
10356 (define_insn "xop_pmacsswd"
10357 [(set (match_operand:V4SI 0 "register_operand" "=x")
10362 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10363 (parallel [(const_int 1)
10369 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10370 (parallel [(const_int 1)
10374 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10376 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10377 [(set_attr "type" "ssemuladd")
10378 (set_attr "mode" "TI")])
10380 (define_insn "xop_pmacswd"
10381 [(set (match_operand:V4SI 0 "register_operand" "=x")
10386 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10387 (parallel [(const_int 1)
10393 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10394 (parallel [(const_int 1)
10398 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10400 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10401 [(set_attr "type" "ssemuladd")
10402 (set_attr "mode" "TI")])
10404 (define_insn "xop_pmadcsswd"
10405 [(set (match_operand:V4SI 0 "register_operand" "=x")
10411 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10412 (parallel [(const_int 0)
10418 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10419 (parallel [(const_int 0)
10427 (parallel [(const_int 1)
10434 (parallel [(const_int 1)
10437 (const_int 7)])))))
10438 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10440 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10441 [(set_attr "type" "ssemuladd")
10442 (set_attr "mode" "TI")])
10444 (define_insn "xop_pmadcswd"
10445 [(set (match_operand:V4SI 0 "register_operand" "=x")
10451 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10452 (parallel [(const_int 0)
10458 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10459 (parallel [(const_int 0)
10467 (parallel [(const_int 1)
10474 (parallel [(const_int 1)
10477 (const_int 7)])))))
10478 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10480 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10481 [(set_attr "type" "ssemuladd")
10482 (set_attr "mode" "TI")])
10484 ;; XOP parallel XMM conditional moves
10485 (define_insn "xop_pcmov_<mode>"
10486 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10487 (if_then_else:SSEMODE
10488 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10489 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10490 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10492 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10493 [(set_attr "type" "sse4arg")])
10495 (define_insn "xop_pcmov_<mode>256"
10496 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10497 (if_then_else:AVX256MODE
10498 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10499 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10500 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10502 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10503 [(set_attr "type" "sse4arg")])
10505 ;; XOP horizontal add/subtract instructions
10506 (define_insn "xop_phaddbw"
10507 [(set (match_operand:V8HI 0 "register_operand" "=x")
10511 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10512 (parallel [(const_int 0)
10523 (parallel [(const_int 1)
10530 (const_int 15)])))))]
10532 "vphaddbw\t{%1, %0|%0, %1}"
10533 [(set_attr "type" "sseiadd1")])
10535 (define_insn "xop_phaddbd"
10536 [(set (match_operand:V4SI 0 "register_operand" "=x")
10541 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10542 (parallel [(const_int 0)
10549 (parallel [(const_int 1)
10552 (const_int 13)]))))
10557 (parallel [(const_int 2)
10564 (parallel [(const_int 3)
10567 (const_int 15)]))))))]
10569 "vphaddbd\t{%1, %0|%0, %1}"
10570 [(set_attr "type" "sseiadd1")])
10572 (define_insn "xop_phaddbq"
10573 [(set (match_operand:V2DI 0 "register_operand" "=x")
10579 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10580 (parallel [(const_int 0)
10585 (parallel [(const_int 1)
10591 (parallel [(const_int 2)
10596 (parallel [(const_int 3)
10597 (const_int 7)])))))
10603 (parallel [(const_int 8)
10608 (parallel [(const_int 9)
10609 (const_int 13)]))))
10614 (parallel [(const_int 10)
10619 (parallel [(const_int 11)
10620 (const_int 15)])))))))]
10622 "vphaddbq\t{%1, %0|%0, %1}"
10623 [(set_attr "type" "sseiadd1")])
10625 (define_insn "xop_phaddwd"
10626 [(set (match_operand:V4SI 0 "register_operand" "=x")
10630 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10631 (parallel [(const_int 0)
10638 (parallel [(const_int 1)
10641 (const_int 7)])))))]
10643 "vphaddwd\t{%1, %0|%0, %1}"
10644 [(set_attr "type" "sseiadd1")])
10646 (define_insn "xop_phaddwq"
10647 [(set (match_operand:V2DI 0 "register_operand" "=x")
10652 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10653 (parallel [(const_int 0)
10658 (parallel [(const_int 1)
10664 (parallel [(const_int 2)
10669 (parallel [(const_int 3)
10670 (const_int 7)]))))))]
10672 "vphaddwq\t{%1, %0|%0, %1}"
10673 [(set_attr "type" "sseiadd1")])
10675 (define_insn "xop_phadddq"
10676 [(set (match_operand:V2DI 0 "register_operand" "=x")
10680 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10681 (parallel [(const_int 0)
10686 (parallel [(const_int 1)
10687 (const_int 3)])))))]
10689 "vphadddq\t{%1, %0|%0, %1}"
10690 [(set_attr "type" "sseiadd1")])
10692 (define_insn "xop_phaddubw"
10693 [(set (match_operand:V8HI 0 "register_operand" "=x")
10697 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10698 (parallel [(const_int 0)
10709 (parallel [(const_int 1)
10716 (const_int 15)])))))]
10718 "vphaddubw\t{%1, %0|%0, %1}"
10719 [(set_attr "type" "sseiadd1")])
10721 (define_insn "xop_phaddubd"
10722 [(set (match_operand:V4SI 0 "register_operand" "=x")
10727 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10728 (parallel [(const_int 0)
10735 (parallel [(const_int 1)
10738 (const_int 13)]))))
10743 (parallel [(const_int 2)
10750 (parallel [(const_int 3)
10753 (const_int 15)]))))))]
10755 "vphaddubd\t{%1, %0|%0, %1}"
10756 [(set_attr "type" "sseiadd1")])
10758 (define_insn "xop_phaddubq"
10759 [(set (match_operand:V2DI 0 "register_operand" "=x")
10765 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10766 (parallel [(const_int 0)
10771 (parallel [(const_int 1)
10777 (parallel [(const_int 2)
10782 (parallel [(const_int 3)
10783 (const_int 7)])))))
10789 (parallel [(const_int 8)
10794 (parallel [(const_int 9)
10795 (const_int 13)]))))
10800 (parallel [(const_int 10)
10805 (parallel [(const_int 11)
10806 (const_int 15)])))))))]
10808 "vphaddubq\t{%1, %0|%0, %1}"
10809 [(set_attr "type" "sseiadd1")])
10811 (define_insn "xop_phadduwd"
10812 [(set (match_operand:V4SI 0 "register_operand" "=x")
10816 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10817 (parallel [(const_int 0)
10824 (parallel [(const_int 1)
10827 (const_int 7)])))))]
10829 "vphadduwd\t{%1, %0|%0, %1}"
10830 [(set_attr "type" "sseiadd1")])
10832 (define_insn "xop_phadduwq"
10833 [(set (match_operand:V2DI 0 "register_operand" "=x")
10838 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10839 (parallel [(const_int 0)
10844 (parallel [(const_int 1)
10850 (parallel [(const_int 2)
10855 (parallel [(const_int 3)
10856 (const_int 7)]))))))]
10858 "vphadduwq\t{%1, %0|%0, %1}"
10859 [(set_attr "type" "sseiadd1")])
10861 (define_insn "xop_phaddudq"
10862 [(set (match_operand:V2DI 0 "register_operand" "=x")
10866 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10867 (parallel [(const_int 0)
10872 (parallel [(const_int 1)
10873 (const_int 3)])))))]
10875 "vphaddudq\t{%1, %0|%0, %1}"
10876 [(set_attr "type" "sseiadd1")])
10878 (define_insn "xop_phsubbw"
10879 [(set (match_operand:V8HI 0 "register_operand" "=x")
10883 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10884 (parallel [(const_int 0)
10895 (parallel [(const_int 1)
10902 (const_int 15)])))))]
10904 "vphsubbw\t{%1, %0|%0, %1}"
10905 [(set_attr "type" "sseiadd1")])
10907 (define_insn "xop_phsubwd"
10908 [(set (match_operand:V4SI 0 "register_operand" "=x")
10912 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10913 (parallel [(const_int 0)
10920 (parallel [(const_int 1)
10923 (const_int 7)])))))]
10925 "vphsubwd\t{%1, %0|%0, %1}"
10926 [(set_attr "type" "sseiadd1")])
10928 (define_insn "xop_phsubdq"
10929 [(set (match_operand:V2DI 0 "register_operand" "=x")
10933 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10934 (parallel [(const_int 0)
10939 (parallel [(const_int 1)
10940 (const_int 3)])))))]
10942 "vphsubdq\t{%1, %0|%0, %1}"
10943 [(set_attr "type" "sseiadd1")])
10945 ;; XOP permute instructions
10946 (define_insn "xop_pperm"
10947 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10949 [(match_operand:V16QI 1 "register_operand" "x,x")
10950 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10951 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10952 UNSPEC_XOP_PERMUTE))]
10953 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10954 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10955 [(set_attr "type" "sse4arg")
10956 (set_attr "mode" "TI")])
10958 ;; XOP pack instructions that combine two vectors into a smaller vector
10959 (define_insn "xop_pperm_pack_v2di_v4si"
10960 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10963 (match_operand:V2DI 1 "register_operand" "x,x"))
10965 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10966 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10967 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10968 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10969 [(set_attr "type" "sse4arg")
10970 (set_attr "mode" "TI")])
10972 (define_insn "xop_pperm_pack_v4si_v8hi"
10973 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10976 (match_operand:V4SI 1 "register_operand" "x,x"))
10978 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10979 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10980 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10981 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10982 [(set_attr "type" "sse4arg")
10983 (set_attr "mode" "TI")])
10985 (define_insn "xop_pperm_pack_v8hi_v16qi"
10986 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10989 (match_operand:V8HI 1 "register_operand" "x,x"))
10991 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10992 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10993 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10994 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10995 [(set_attr "type" "sse4arg")
10996 (set_attr "mode" "TI")])
10998 ;; XOP packed rotate instructions
10999 (define_expand "rotl<mode>3"
11000 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11001 (rotate:SSEMODE1248
11002 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11003 (match_operand:SI 2 "general_operand")))]
11006 /* If we were given a scalar, convert it to parallel */
11007 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11009 rtvec vs = rtvec_alloc (<ssescalarnum>);
11010 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11011 rtx reg = gen_reg_rtx (<MODE>mode);
11012 rtx op2 = operands[2];
11015 if (GET_MODE (op2) != <ssescalarmode>mode)
11017 op2 = gen_reg_rtx (<ssescalarmode>mode);
11018 convert_move (op2, operands[2], false);
11021 for (i = 0; i < <ssescalarnum>; i++)
11022 RTVEC_ELT (vs, i) = op2;
11024 emit_insn (gen_vec_init<mode> (reg, par));
11025 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11030 (define_expand "rotr<mode>3"
11031 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11032 (rotatert:SSEMODE1248
11033 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11034 (match_operand:SI 2 "general_operand")))]
11037 /* If we were given a scalar, convert it to parallel */
11038 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11040 rtvec vs = rtvec_alloc (<ssescalarnum>);
11041 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11042 rtx neg = gen_reg_rtx (<MODE>mode);
11043 rtx reg = gen_reg_rtx (<MODE>mode);
11044 rtx op2 = operands[2];
11047 if (GET_MODE (op2) != <ssescalarmode>mode)
11049 op2 = gen_reg_rtx (<ssescalarmode>mode);
11050 convert_move (op2, operands[2], false);
11053 for (i = 0; i < <ssescalarnum>; i++)
11054 RTVEC_ELT (vs, i) = op2;
11056 emit_insn (gen_vec_init<mode> (reg, par));
11057 emit_insn (gen_neg<mode>2 (neg, reg));
11058 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11063 (define_insn "xop_rotl<mode>3"
11064 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11065 (rotate:SSEMODE1248
11066 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11067 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11069 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11070 [(set_attr "type" "sseishft")
11071 (set_attr "length_immediate" "1")
11072 (set_attr "mode" "TI")])
11074 (define_insn "xop_rotr<mode>3"
11075 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11076 (rotatert:SSEMODE1248
11077 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11078 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11081 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11082 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11084 [(set_attr "type" "sseishft")
11085 (set_attr "length_immediate" "1")
11086 (set_attr "mode" "TI")])
11088 (define_expand "vrotr<mode>3"
11089 [(match_operand:SSEMODE1248 0 "register_operand" "")
11090 (match_operand:SSEMODE1248 1 "register_operand" "")
11091 (match_operand:SSEMODE1248 2 "register_operand" "")]
11094 rtx reg = gen_reg_rtx (<MODE>mode);
11095 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11096 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11100 (define_expand "vrotl<mode>3"
11101 [(match_operand:SSEMODE1248 0 "register_operand" "")
11102 (match_operand:SSEMODE1248 1 "register_operand" "")
11103 (match_operand:SSEMODE1248 2 "register_operand" "")]
11106 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11110 (define_insn "xop_vrotl<mode>3"
11111 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11112 (if_then_else:SSEMODE1248
11114 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11116 (rotate:SSEMODE1248
11117 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11119 (rotatert:SSEMODE1248
11121 (neg:SSEMODE1248 (match_dup 2)))))]
11122 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11123 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11124 [(set_attr "type" "sseishft")
11125 (set_attr "prefix_data16" "0")
11126 (set_attr "prefix_extra" "2")
11127 (set_attr "mode" "TI")])
11129 ;; XOP packed shift instructions.
11130 ;; FIXME: add V2DI back in
11131 (define_expand "vlshr<mode>3"
11132 [(match_operand:SSEMODE124 0 "register_operand" "")
11133 (match_operand:SSEMODE124 1 "register_operand" "")
11134 (match_operand:SSEMODE124 2 "register_operand" "")]
11137 rtx neg = gen_reg_rtx (<MODE>mode);
11138 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11139 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11143 (define_expand "vashr<mode>3"
11144 [(match_operand:SSEMODE124 0 "register_operand" "")
11145 (match_operand:SSEMODE124 1 "register_operand" "")
11146 (match_operand:SSEMODE124 2 "register_operand" "")]
11149 rtx neg = gen_reg_rtx (<MODE>mode);
11150 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11151 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11155 (define_expand "vashl<mode>3"
11156 [(match_operand:SSEMODE124 0 "register_operand" "")
11157 (match_operand:SSEMODE124 1 "register_operand" "")
11158 (match_operand:SSEMODE124 2 "register_operand" "")]
11161 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11165 (define_insn "xop_ashl<mode>3"
11166 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11167 (if_then_else:SSEMODE1248
11169 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11171 (ashift:SSEMODE1248
11172 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11174 (ashiftrt:SSEMODE1248
11176 (neg:SSEMODE1248 (match_dup 2)))))]
11177 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11178 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11179 [(set_attr "type" "sseishft")
11180 (set_attr "prefix_data16" "0")
11181 (set_attr "prefix_extra" "2")
11182 (set_attr "mode" "TI")])
11184 (define_insn "xop_lshl<mode>3"
11185 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11186 (if_then_else:SSEMODE1248
11188 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11190 (ashift:SSEMODE1248
11191 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11193 (lshiftrt:SSEMODE1248
11195 (neg:SSEMODE1248 (match_dup 2)))))]
11196 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11197 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11198 [(set_attr "type" "sseishft")
11199 (set_attr "prefix_data16" "0")
11200 (set_attr "prefix_extra" "2")
11201 (set_attr "mode" "TI")])
11203 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11204 (define_expand "ashlv16qi3"
11205 [(match_operand:V16QI 0 "register_operand" "")
11206 (match_operand:V16QI 1 "register_operand" "")
11207 (match_operand:SI 2 "nonmemory_operand" "")]
11210 rtvec vs = rtvec_alloc (16);
11211 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11212 rtx reg = gen_reg_rtx (V16QImode);
11214 for (i = 0; i < 16; i++)
11215 RTVEC_ELT (vs, i) = operands[2];
11217 emit_insn (gen_vec_initv16qi (reg, par));
11218 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11222 (define_expand "lshlv16qi3"
11223 [(match_operand:V16QI 0 "register_operand" "")
11224 (match_operand:V16QI 1 "register_operand" "")
11225 (match_operand:SI 2 "nonmemory_operand" "")]
11228 rtvec vs = rtvec_alloc (16);
11229 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11230 rtx reg = gen_reg_rtx (V16QImode);
11232 for (i = 0; i < 16; i++)
11233 RTVEC_ELT (vs, i) = operands[2];
11235 emit_insn (gen_vec_initv16qi (reg, par));
11236 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11240 (define_expand "ashrv16qi3"
11241 [(match_operand:V16QI 0 "register_operand" "")
11242 (match_operand:V16QI 1 "register_operand" "")
11243 (match_operand:SI 2 "nonmemory_operand" "")]
11246 rtvec vs = rtvec_alloc (16);
11247 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11248 rtx reg = gen_reg_rtx (V16QImode);
11250 rtx ele = ((CONST_INT_P (operands[2]))
11251 ? GEN_INT (- INTVAL (operands[2]))
11254 for (i = 0; i < 16; i++)
11255 RTVEC_ELT (vs, i) = ele;
11257 emit_insn (gen_vec_initv16qi (reg, par));
11259 if (!CONST_INT_P (operands[2]))
11261 rtx neg = gen_reg_rtx (V16QImode);
11262 emit_insn (gen_negv16qi2 (neg, reg));
11263 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11266 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11271 (define_expand "ashrv2di3"
11272 [(match_operand:V2DI 0 "register_operand" "")
11273 (match_operand:V2DI 1 "register_operand" "")
11274 (match_operand:DI 2 "nonmemory_operand" "")]
11277 rtvec vs = rtvec_alloc (2);
11278 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11279 rtx reg = gen_reg_rtx (V2DImode);
11282 if (CONST_INT_P (operands[2]))
11283 ele = GEN_INT (- INTVAL (operands[2]));
11284 else if (GET_MODE (operands[2]) != DImode)
11286 rtx move = gen_reg_rtx (DImode);
11287 ele = gen_reg_rtx (DImode);
11288 convert_move (move, operands[2], false);
11289 emit_insn (gen_negdi2 (ele, move));
11293 ele = gen_reg_rtx (DImode);
11294 emit_insn (gen_negdi2 (ele, operands[2]));
11297 RTVEC_ELT (vs, 0) = ele;
11298 RTVEC_ELT (vs, 1) = ele;
11299 emit_insn (gen_vec_initv2di (reg, par));
11300 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11304 ;; XOP FRCZ support
11305 (define_insn "xop_frcz<mode>2"
11306 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11308 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11311 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11312 [(set_attr "type" "ssecvt1")
11313 (set_attr "mode" "<MODE>")])
11316 (define_expand "xop_vmfrcz<mode>2"
11317 [(set (match_operand:SSEMODEF2P 0 "register_operand")
11318 (vec_merge:SSEMODEF2P
11320 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
11326 operands[3] = CONST0_RTX (<MODE>mode);
11329 (define_insn "*xop_vmfrcz_<mode>"
11330 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11331 (vec_merge:SSEMODEF2P
11333 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11335 (match_operand:SSEMODEF2P 2 "const0_operand")
11338 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11339 [(set_attr "type" "ssecvt1")
11340 (set_attr "mode" "<MODE>")])
11342 (define_insn "xop_maskcmp<mode>3"
11343 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11344 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11345 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11346 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11348 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11349 [(set_attr "type" "sse4arg")
11350 (set_attr "prefix_data16" "0")
11351 (set_attr "prefix_rep" "0")
11352 (set_attr "prefix_extra" "2")
11353 (set_attr "length_immediate" "1")
11354 (set_attr "mode" "TI")])
11356 (define_insn "xop_maskcmp_uns<mode>3"
11357 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11358 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11359 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11360 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11362 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11363 [(set_attr "type" "ssecmp")
11364 (set_attr "prefix_data16" "0")
11365 (set_attr "prefix_rep" "0")
11366 (set_attr "prefix_extra" "2")
11367 (set_attr "length_immediate" "1")
11368 (set_attr "mode" "TI")])
11370 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11371 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11372 ;; the exact instruction generated for the intrinsic.
11373 (define_insn "xop_maskcmp_uns2<mode>3"
11374 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11375 (unspec:SSEMODE1248
11376 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11377 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11378 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11379 UNSPEC_XOP_UNSIGNED_CMP))]
11381 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11382 [(set_attr "type" "ssecmp")
11383 (set_attr "prefix_data16" "0")
11384 (set_attr "prefix_extra" "2")
11385 (set_attr "length_immediate" "1")
11386 (set_attr "mode" "TI")])
11388 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11389 ;; being added here to be complete.
11390 (define_insn "xop_pcom_tf<mode>3"
11391 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11392 (unspec:SSEMODE1248
11393 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11394 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11395 (match_operand:SI 3 "const_int_operand" "n")]
11396 UNSPEC_XOP_TRUEFALSE))]
11399 return ((INTVAL (operands[3]) != 0)
11400 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11401 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11403 [(set_attr "type" "ssecmp")
11404 (set_attr "prefix_data16" "0")
11405 (set_attr "prefix_extra" "2")
11406 (set_attr "length_immediate" "1")
11407 (set_attr "mode" "TI")])
11409 (define_insn "xop_vpermil2<mode>3"
11410 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11412 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11413 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11414 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11415 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11418 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11419 [(set_attr "type" "sse4arg")
11420 (set_attr "length_immediate" "1")
11421 (set_attr "mode" "<MODE>")])
11423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11424 (define_insn "*avx_aesenc"
11425 [(set (match_operand:V2DI 0 "register_operand" "=x")
11426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11427 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11429 "TARGET_AES && TARGET_AVX"
11430 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11431 [(set_attr "type" "sselog1")
11432 (set_attr "prefix_extra" "1")
11433 (set_attr "prefix" "vex")
11434 (set_attr "mode" "TI")])
11436 (define_insn "aesenc"
11437 [(set (match_operand:V2DI 0 "register_operand" "=x")
11438 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11439 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11442 "aesenc\t{%2, %0|%0, %2}"
11443 [(set_attr "type" "sselog1")
11444 (set_attr "prefix_extra" "1")
11445 (set_attr "mode" "TI")])
11447 (define_insn "*avx_aesenclast"
11448 [(set (match_operand:V2DI 0 "register_operand" "=x")
11449 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11450 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11451 UNSPEC_AESENCLAST))]
11452 "TARGET_AES && TARGET_AVX"
11453 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11454 [(set_attr "type" "sselog1")
11455 (set_attr "prefix_extra" "1")
11456 (set_attr "prefix" "vex")
11457 (set_attr "mode" "TI")])
11459 (define_insn "aesenclast"
11460 [(set (match_operand:V2DI 0 "register_operand" "=x")
11461 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11462 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11463 UNSPEC_AESENCLAST))]
11465 "aesenclast\t{%2, %0|%0, %2}"
11466 [(set_attr "type" "sselog1")
11467 (set_attr "prefix_extra" "1")
11468 (set_attr "mode" "TI")])
11470 (define_insn "*avx_aesdec"
11471 [(set (match_operand:V2DI 0 "register_operand" "=x")
11472 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11473 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11475 "TARGET_AES && TARGET_AVX"
11476 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11477 [(set_attr "type" "sselog1")
11478 (set_attr "prefix_extra" "1")
11479 (set_attr "prefix" "vex")
11480 (set_attr "mode" "TI")])
11482 (define_insn "aesdec"
11483 [(set (match_operand:V2DI 0 "register_operand" "=x")
11484 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11485 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11488 "aesdec\t{%2, %0|%0, %2}"
11489 [(set_attr "type" "sselog1")
11490 (set_attr "prefix_extra" "1")
11491 (set_attr "mode" "TI")])
11493 (define_insn "*avx_aesdeclast"
11494 [(set (match_operand:V2DI 0 "register_operand" "=x")
11495 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11496 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11497 UNSPEC_AESDECLAST))]
11498 "TARGET_AES && TARGET_AVX"
11499 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11500 [(set_attr "type" "sselog1")
11501 (set_attr "prefix_extra" "1")
11502 (set_attr "prefix" "vex")
11503 (set_attr "mode" "TI")])
11505 (define_insn "aesdeclast"
11506 [(set (match_operand:V2DI 0 "register_operand" "=x")
11507 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11508 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11509 UNSPEC_AESDECLAST))]
11511 "aesdeclast\t{%2, %0|%0, %2}"
11512 [(set_attr "type" "sselog1")
11513 (set_attr "prefix_extra" "1")
11514 (set_attr "mode" "TI")])
11516 (define_insn "aesimc"
11517 [(set (match_operand:V2DI 0 "register_operand" "=x")
11518 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11521 "%vaesimc\t{%1, %0|%0, %1}"
11522 [(set_attr "type" "sselog1")
11523 (set_attr "prefix_extra" "1")
11524 (set_attr "prefix" "maybe_vex")
11525 (set_attr "mode" "TI")])
11527 (define_insn "aeskeygenassist"
11528 [(set (match_operand:V2DI 0 "register_operand" "=x")
11529 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11530 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11531 UNSPEC_AESKEYGENASSIST))]
11533 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11534 [(set_attr "type" "sselog1")
11535 (set_attr "prefix_extra" "1")
11536 (set_attr "length_immediate" "1")
11537 (set_attr "prefix" "maybe_vex")
11538 (set_attr "mode" "TI")])
11540 (define_insn "*vpclmulqdq"
11541 [(set (match_operand:V2DI 0 "register_operand" "=x")
11542 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11543 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11544 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11546 "TARGET_PCLMUL && TARGET_AVX"
11547 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11548 [(set_attr "type" "sselog1")
11549 (set_attr "prefix_extra" "1")
11550 (set_attr "length_immediate" "1")
11551 (set_attr "prefix" "vex")
11552 (set_attr "mode" "TI")])
11554 (define_insn "pclmulqdq"
11555 [(set (match_operand:V2DI 0 "register_operand" "=x")
11556 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11557 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11558 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11561 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11562 [(set_attr "type" "sselog1")
11563 (set_attr "prefix_extra" "1")
11564 (set_attr "length_immediate" "1")
11565 (set_attr "mode" "TI")])
11567 (define_expand "avx_vzeroall"
11568 [(match_par_dup 0 [(const_int 0)])]
11571 int nregs = TARGET_64BIT ? 16 : 8;
11574 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11576 XVECEXP (operands[0], 0, 0)
11577 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11580 for (regno = 0; regno < nregs; regno++)
11581 XVECEXP (operands[0], 0, regno + 1)
11582 = gen_rtx_SET (VOIDmode,
11583 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11584 CONST0_RTX (V8SImode));
11587 (define_insn "*avx_vzeroall"
11588 [(match_parallel 0 "vzeroall_operation"
11589 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11592 [(set_attr "type" "sse")
11593 (set_attr "modrm" "0")
11594 (set_attr "memory" "none")
11595 (set_attr "prefix" "vex")
11596 (set_attr "mode" "OI")])
11598 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11599 ;; if the upper 128bits are unused.
11600 (define_insn "avx_vzeroupper"
11601 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11602 UNSPECV_VZEROUPPER)]
11605 [(set_attr "type" "sse")
11606 (set_attr "modrm" "0")
11607 (set_attr "memory" "none")
11608 (set_attr "prefix" "vex")
11609 (set_attr "mode" "OI")])
11611 (define_insn_and_split "vec_dup<mode>"
11612 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11613 (vec_duplicate:AVX256MODE24P
11614 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11617 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11619 "&& reload_completed && REG_P (operands[1])"
11620 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11621 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11622 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11623 [(set_attr "type" "ssemov")
11624 (set_attr "prefix_extra" "1")
11625 (set_attr "prefix" "vex")
11626 (set_attr "mode" "V8SF")])
11628 (define_insn "avx_vbroadcastf128_<mode>"
11629 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11630 (vec_concat:AVX256MODE
11631 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11635 vbroadcastf128\t{%1, %0|%0, %1}
11636 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11637 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11638 [(set_attr "type" "ssemov,sselog1,sselog1")
11639 (set_attr "prefix_extra" "1")
11640 (set_attr "length_immediate" "0,1,1")
11641 (set_attr "prefix" "vex")
11642 (set_attr "mode" "V4SF,V8SF,V8SF")])
11644 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11645 ;; If it so happens that the input is in memory, use vbroadcast.
11646 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11647 (define_insn "*avx_vperm_broadcast_v4sf"
11648 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11650 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11651 (match_parallel 2 "avx_vbroadcast_operand"
11652 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11655 int elt = INTVAL (operands[3]);
11656 switch (which_alternative)
11660 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11661 return "vbroadcastss\t{%1, %0|%0, %1}";
11663 operands[2] = GEN_INT (elt * 0x55);
11664 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11666 gcc_unreachable ();
11669 [(set_attr "type" "ssemov,ssemov,sselog1")
11670 (set_attr "prefix_extra" "1")
11671 (set_attr "length_immediate" "0,0,1")
11672 (set_attr "prefix" "vex")
11673 (set_attr "mode" "SF,SF,V4SF")])
11675 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11676 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11677 (vec_select:AVX256MODEF2P
11678 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11679 (match_parallel 2 "avx_vbroadcast_operand"
11680 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11683 "&& reload_completed"
11684 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11686 rtx op0 = operands[0], op1 = operands[1];
11687 int elt = INTVAL (operands[3]);
11693 /* Shuffle element we care about into all elements of the 128-bit lane.
11694 The other lane gets shuffled too, but we don't care. */
11695 if (<MODE>mode == V4DFmode)
11696 mask = (elt & 1 ? 15 : 0);
11698 mask = (elt & 3) * 0x55;
11699 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11701 /* Shuffle the lane we care about into both lanes of the dest. */
11702 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11703 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11707 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11708 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11711 (define_expand "avx_vpermil<mode>"
11712 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11713 (vec_select:AVXMODEFDP
11714 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11715 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11718 int mask = INTVAL (operands[2]);
11719 rtx perm[<ssescalarnum>];
11721 perm[0] = GEN_INT (mask & 1);
11722 perm[1] = GEN_INT ((mask >> 1) & 1);
11723 if (<MODE>mode == V4DFmode)
11725 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11726 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11730 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11733 (define_expand "avx_vpermil<mode>"
11734 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11735 (vec_select:AVXMODEFSP
11736 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11737 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11740 int mask = INTVAL (operands[2]);
11741 rtx perm[<ssescalarnum>];
11743 perm[0] = GEN_INT (mask & 3);
11744 perm[1] = GEN_INT ((mask >> 2) & 3);
11745 perm[2] = GEN_INT ((mask >> 4) & 3);
11746 perm[3] = GEN_INT ((mask >> 6) & 3);
11747 if (<MODE>mode == V8SFmode)
11749 perm[4] = GEN_INT ((mask & 3) + 4);
11750 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11751 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11752 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11756 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11759 (define_insn "*avx_vpermilp<mode>"
11760 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11761 (vec_select:AVXMODEF2P
11762 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11763 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11764 [(match_operand 3 "const_int_operand" "")])))]
11767 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11768 operands[2] = GEN_INT (mask);
11769 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11771 [(set_attr "type" "sselog")
11772 (set_attr "prefix_extra" "1")
11773 (set_attr "length_immediate" "1")
11774 (set_attr "prefix" "vex")
11775 (set_attr "mode" "<MODE>")])
11777 (define_insn "avx_vpermilvar<mode>3"
11778 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11780 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11781 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11784 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11785 [(set_attr "type" "sselog")
11786 (set_attr "prefix_extra" "1")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "<MODE>")])
11790 (define_expand "avx_vperm2f128<mode>3"
11791 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11792 (unspec:AVX256MODE2P
11793 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11794 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11795 (match_operand:SI 3 "const_0_to_255_operand" "")]
11796 UNSPEC_VPERMIL2F128))]
11799 int mask = INTVAL (operands[3]);
11800 if ((mask & 0x88) == 0)
11802 rtx perm[<ssescalarnum>], t1, t2;
11803 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11805 base = (mask & 3) * nelt2;
11806 for (i = 0; i < nelt2; ++i)
11807 perm[i] = GEN_INT (base + i);
11809 base = ((mask >> 4) & 3) * nelt2;
11810 for (i = 0; i < nelt2; ++i)
11811 perm[i + nelt2] = GEN_INT (base + i);
11813 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11814 operands[1], operands[2]);
11815 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11816 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11817 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11823 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11824 ;; means that in order to represent this properly in rtl we'd have to
11825 ;; nest *another* vec_concat with a zero operand and do the select from
11826 ;; a 4x wide vector. That doesn't seem very nice.
11827 (define_insn "*avx_vperm2f128<mode>_full"
11828 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11829 (unspec:AVX256MODE2P
11830 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11831 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11832 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11833 UNSPEC_VPERMIL2F128))]
11835 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11836 [(set_attr "type" "sselog")
11837 (set_attr "prefix_extra" "1")
11838 (set_attr "length_immediate" "1")
11839 (set_attr "prefix" "vex")
11840 (set_attr "mode" "V8SF")])
11842 (define_insn "*avx_vperm2f128<mode>_nozero"
11843 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11844 (vec_select:AVX256MODE2P
11845 (vec_concat:<ssedoublesizemode>
11846 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11847 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11848 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11849 [(match_operand 4 "const_int_operand" "")])))]
11852 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11853 operands[3] = GEN_INT (mask);
11854 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11856 [(set_attr "type" "sselog")
11857 (set_attr "prefix_extra" "1")
11858 (set_attr "length_immediate" "1")
11859 (set_attr "prefix" "vex")
11860 (set_attr "mode" "V8SF")])
11862 (define_expand "avx_vinsertf128<mode>"
11863 [(match_operand:AVX256MODE 0 "register_operand" "")
11864 (match_operand:AVX256MODE 1 "register_operand" "")
11865 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11866 (match_operand:SI 3 "const_0_to_1_operand" "")]
11869 switch (INTVAL (operands[3]))
11872 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11876 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11880 gcc_unreachable ();
11885 (define_insn "vec_set_lo_<mode>"
11886 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11887 (vec_concat:AVX256MODE4P
11888 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11889 (vec_select:<avxhalfvecmode>
11890 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11891 (parallel [(const_int 2) (const_int 3)]))))]
11893 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11894 [(set_attr "type" "sselog")
11895 (set_attr "prefix_extra" "1")
11896 (set_attr "length_immediate" "1")
11897 (set_attr "prefix" "vex")
11898 (set_attr "mode" "V8SF")])
11900 (define_insn "vec_set_hi_<mode>"
11901 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11902 (vec_concat:AVX256MODE4P
11903 (vec_select:<avxhalfvecmode>
11904 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11905 (parallel [(const_int 0) (const_int 1)]))
11906 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11908 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11909 [(set_attr "type" "sselog")
11910 (set_attr "prefix_extra" "1")
11911 (set_attr "length_immediate" "1")
11912 (set_attr "prefix" "vex")
11913 (set_attr "mode" "V8SF")])
11915 (define_insn "vec_set_lo_<mode>"
11916 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11917 (vec_concat:AVX256MODE8P
11918 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11919 (vec_select:<avxhalfvecmode>
11920 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11921 (parallel [(const_int 4) (const_int 5)
11922 (const_int 6) (const_int 7)]))))]
11924 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11925 [(set_attr "type" "sselog")
11926 (set_attr "prefix_extra" "1")
11927 (set_attr "length_immediate" "1")
11928 (set_attr "prefix" "vex")
11929 (set_attr "mode" "V8SF")])
11931 (define_insn "vec_set_hi_<mode>"
11932 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11933 (vec_concat:AVX256MODE8P
11934 (vec_select:<avxhalfvecmode>
11935 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11936 (parallel [(const_int 0) (const_int 1)
11937 (const_int 2) (const_int 3)]))
11938 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11940 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11941 [(set_attr "type" "sselog")
11942 (set_attr "prefix_extra" "1")
11943 (set_attr "length_immediate" "1")
11944 (set_attr "prefix" "vex")
11945 (set_attr "mode" "V8SF")])
11947 (define_insn "vec_set_lo_v16hi"
11948 [(set (match_operand:V16HI 0 "register_operand" "=x")
11950 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11952 (match_operand:V16HI 1 "register_operand" "x")
11953 (parallel [(const_int 8) (const_int 9)
11954 (const_int 10) (const_int 11)
11955 (const_int 12) (const_int 13)
11956 (const_int 14) (const_int 15)]))))]
11958 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11959 [(set_attr "type" "sselog")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "length_immediate" "1")
11962 (set_attr "prefix" "vex")
11963 (set_attr "mode" "V8SF")])
11965 (define_insn "vec_set_hi_v16hi"
11966 [(set (match_operand:V16HI 0 "register_operand" "=x")
11969 (match_operand:V16HI 1 "register_operand" "x")
11970 (parallel [(const_int 0) (const_int 1)
11971 (const_int 2) (const_int 3)
11972 (const_int 4) (const_int 5)
11973 (const_int 6) (const_int 7)]))
11974 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11976 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11977 [(set_attr "type" "sselog")
11978 (set_attr "prefix_extra" "1")
11979 (set_attr "length_immediate" "1")
11980 (set_attr "prefix" "vex")
11981 (set_attr "mode" "V8SF")])
11983 (define_insn "vec_set_lo_v32qi"
11984 [(set (match_operand:V32QI 0 "register_operand" "=x")
11986 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11988 (match_operand:V32QI 1 "register_operand" "x")
11989 (parallel [(const_int 16) (const_int 17)
11990 (const_int 18) (const_int 19)
11991 (const_int 20) (const_int 21)
11992 (const_int 22) (const_int 23)
11993 (const_int 24) (const_int 25)
11994 (const_int 26) (const_int 27)
11995 (const_int 28) (const_int 29)
11996 (const_int 30) (const_int 31)]))))]
11998 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11999 [(set_attr "type" "sselog")
12000 (set_attr "prefix_extra" "1")
12001 (set_attr "length_immediate" "1")
12002 (set_attr "prefix" "vex")
12003 (set_attr "mode" "V8SF")])
12005 (define_insn "vec_set_hi_v32qi"
12006 [(set (match_operand:V32QI 0 "register_operand" "=x")
12009 (match_operand:V32QI 1 "register_operand" "x")
12010 (parallel [(const_int 0) (const_int 1)
12011 (const_int 2) (const_int 3)
12012 (const_int 4) (const_int 5)
12013 (const_int 6) (const_int 7)
12014 (const_int 8) (const_int 9)
12015 (const_int 10) (const_int 11)
12016 (const_int 12) (const_int 13)
12017 (const_int 14) (const_int 15)]))
12018 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12020 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12021 [(set_attr "type" "sselog")
12022 (set_attr "prefix_extra" "1")
12023 (set_attr "length_immediate" "1")
12024 (set_attr "prefix" "vex")
12025 (set_attr "mode" "V8SF")])
12027 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12028 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12030 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12031 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12035 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12036 [(set_attr "type" "sselog1")
12037 (set_attr "prefix_extra" "1")
12038 (set_attr "prefix" "vex")
12039 (set_attr "mode" "<MODE>")])
12041 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12042 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12044 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12045 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12047 UNSPEC_MASKSTORE))]
12049 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12050 [(set_attr "type" "sselog1")
12051 (set_attr "prefix_extra" "1")
12052 (set_attr "prefix" "vex")
12053 (set_attr "mode" "<MODE>")])
12055 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12056 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12057 (unspec:AVX256MODE2P
12058 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12062 "&& reload_completed"
12065 rtx op1 = operands[1];
12067 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12069 op1 = gen_lowpart (<MODE>mode, op1);
12070 emit_move_insn (operands[0], op1);
12074 (define_expand "vec_init<mode>"
12075 [(match_operand:AVX256MODE 0 "register_operand" "")
12076 (match_operand 1 "" "")]
12079 ix86_expand_vector_init (false, operands[0], operands[1]);
12083 (define_insn "*vec_concat<mode>_avx"
12084 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12085 (vec_concat:AVX256MODE
12086 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12087 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12090 switch (which_alternative)
12093 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12095 switch (get_attr_mode (insn))
12098 return "vmovaps\t{%1, %x0|%x0, %1}";
12100 return "vmovapd\t{%1, %x0|%x0, %1}";
12102 return "vmovdqa\t{%1, %x0|%x0, %1}";
12105 gcc_unreachable ();
12108 [(set_attr "type" "sselog,ssemov")
12109 (set_attr "prefix_extra" "1,*")
12110 (set_attr "length_immediate" "1,*")
12111 (set_attr "prefix" "vex")
12112 (set_attr "mode" "<avxvecmode>")])
12114 (define_insn "vcvtph2ps"
12115 [(set (match_operand:V4SF 0 "register_operand" "=x")
12117 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12119 (parallel [(const_int 0) (const_int 1)
12120 (const_int 1) (const_int 2)])))]
12122 "vcvtph2ps\t{%1, %0|%0, %1}"
12123 [(set_attr "type" "ssecvt")
12124 (set_attr "prefix" "vex")
12125 (set_attr "mode" "V4SF")])
12127 (define_insn "*vcvtph2ps_load"
12128 [(set (match_operand:V4SF 0 "register_operand" "=x")
12129 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12130 UNSPEC_VCVTPH2PS))]
12132 "vcvtph2ps\t{%1, %0|%0, %1}"
12133 [(set_attr "type" "ssecvt")
12134 (set_attr "prefix" "vex")
12135 (set_attr "mode" "V8SF")])
12137 (define_insn "vcvtph2ps256"
12138 [(set (match_operand:V8SF 0 "register_operand" "=x")
12139 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12140 UNSPEC_VCVTPH2PS))]
12142 "vcvtph2ps\t{%1, %0|%0, %1}"
12143 [(set_attr "type" "ssecvt")
12144 (set_attr "prefix" "vex")
12145 (set_attr "mode" "V8SF")])
12147 (define_expand "vcvtps2ph"
12148 [(set (match_operand:V8HI 0 "register_operand" "")
12150 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12151 (match_operand:SI 2 "immediate_operand" "")]
12155 "operands[3] = CONST0_RTX (V4HImode);")
12157 (define_insn "*vcvtps2ph"
12158 [(set (match_operand:V8HI 0 "register_operand" "=x")
12160 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12161 (match_operand:SI 2 "immediate_operand" "N")]
12163 (match_operand:V4HI 3 "const0_operand" "")))]
12165 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12166 [(set_attr "type" "ssecvt")
12167 (set_attr "prefix" "vex")
12168 (set_attr "mode" "V4SF")])
12170 (define_insn "*vcvtps2ph_store"
12171 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12172 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12173 (match_operand:SI 2 "immediate_operand" "N")]
12174 UNSPEC_VCVTPS2PH))]
12176 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12177 [(set_attr "type" "ssecvt")
12178 (set_attr "prefix" "vex")
12179 (set_attr "mode" "V4SF")])
12181 (define_insn "vcvtps2ph256"
12182 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12183 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12184 (match_operand:SI 2 "immediate_operand" "N")]
12185 UNSPEC_VCVTPS2PH))]
12187 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12188 [(set_attr "type" "ssecvt")
12189 (set_attr "prefix" "vex")
12190 (set_attr "mode" "V8SF")])