1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
60 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
61 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
62 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
63 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
64 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
65 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
66 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
67 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
68 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
69 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
72 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
74 ;; Int-float size matches
75 (define_mode_iterator SSEMODE4S [V4SF V4SI])
76 (define_mode_iterator SSEMODE2D [V2DF V2DI])
78 ;; Modes handled by integer vcond pattern
79 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
80 (V2DI "TARGET_SSE4_2")])
82 ;; Modes handled by vec_extract_even/odd pattern.
83 (define_mode_iterator SSEMODE_EO
86 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
87 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
88 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
90 ;; Modes handled by storent patterns.
91 (define_mode_iterator STORENT_MODE
92 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
93 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
95 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
97 ;; Modes handled by vector float patterns.
98 (define_mode_iterator VEC_FLOAT_MODE
99 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
100 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
102 ;; Modes handled by vector extract patterns.
103 (define_mode_iterator VEC_EXTRACT_MODE
104 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
105 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
106 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
107 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
109 ;; Mapping from float mode to required SSE level
110 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
112 ;; Mapping from integer vector mode to mnemonic suffix
113 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
115 ;; Mapping of the insn mnemonic suffix
116 (define_mode_attr ssemodesuffix
117 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
118 (V8SI "ps") (V4DI "pd")])
119 (define_mode_attr ssescalarmodesuffix
120 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
121 (V4DF "sd") (V4SI "d") (V4DI "sd")])
123 ;; Mapping of the max integer size for xop rotate immediate constraint
124 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
126 ;; Mapping of vector modes back to the scalar modes
127 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
128 (V16QI "QI") (V8HI "HI")
129 (V4SI "SI") (V2DI "DI")])
131 ;; Mapping of vector modes to a vector mode of double size
132 (define_mode_attr ssedoublesizemode
133 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
134 (V8HI "V16HI") (V16QI "V32QI")
135 (V4DF "V8DF") (V8SF "V16SF")
136 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
138 ;; Number of scalar elements in each vector type
139 (define_mode_attr ssescalarnum
140 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
141 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
144 (define_mode_attr avxvecmode
145 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
146 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
147 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
148 (define_mode_attr avxvecpsmode
149 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
150 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
151 (define_mode_attr avxhalfvecmode
152 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
153 (V8SF "V4SF") (V4DF "V2DF")
154 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
155 (define_mode_attr avxscalarmode
156 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
157 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
158 (define_mode_attr avxcvtvecmode
159 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
160 (define_mode_attr avxpermvecmode
161 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
162 (define_mode_attr avxmodesuffixp
163 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
165 (define_mode_attr avxmodesuffix
166 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
167 (V8SI "256") (V8SF "256") (V4DF "256")])
169 ;; Mapping of immediate bits for blend instructions
170 (define_mode_attr blendbits
171 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
173 ;; Mapping of immediate bits for pinsr instructions
174 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
176 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184 (define_expand "mov<mode>"
185 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
186 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (<MODE>mode, operands);
193 (define_insn "*avx_mov<mode>_internal"
194 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
195 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
197 && (register_operand (operands[0], <MODE>mode)
198 || register_operand (operands[1], <MODE>mode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 switch (get_attr_mode (insn))
210 if (misaligned_operand (operands[0], <MODE>mode)
211 || misaligned_operand (operands[1], <MODE>mode))
212 return "vmovups\t{%1, %0|%0, %1}";
214 return "vmovaps\t{%1, %0|%0, %1}";
217 if (misaligned_operand (operands[0], <MODE>mode)
218 || misaligned_operand (operands[1], <MODE>mode))
219 return "vmovupd\t{%1, %0|%0, %1}";
220 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
221 return "vmovaps\t{%1, %0|%0, %1}";
223 return "vmovapd\t{%1, %0|%0, %1}";
225 if (misaligned_operand (operands[0], <MODE>mode)
226 || misaligned_operand (operands[1], <MODE>mode))
227 return "vmovdqu\t{%1, %0|%0, %1}";
228 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
229 return "vmovaps\t{%1, %0|%0, %1}";
231 return "vmovdqa\t{%1, %0|%0, %1}";
237 [(set_attr "type" "sselog1,ssemov,ssemov")
238 (set_attr "prefix" "vex")
239 (set_attr "mode" "<avxvecmode>")])
241 ;; All of these patterns are enabled for SSE1 as well as SSE2.
242 ;; This is essential for maintaining stable calling conventions.
244 (define_expand "mov<mode>"
245 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
246 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
249 ix86_expand_vector_move (<MODE>mode, operands);
253 (define_insn "*mov<mode>_internal"
254 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
255 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
257 && (register_operand (operands[0], <MODE>mode)
258 || register_operand (operands[1], <MODE>mode))"
260 switch (which_alternative)
263 return standard_sse_constant_opcode (insn, operands[1]);
266 switch (get_attr_mode (insn))
269 return "movaps\t{%1, %0|%0, %1}";
271 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
272 return "movaps\t{%1, %0|%0, %1}";
274 return "movapd\t{%1, %0|%0, %1}";
276 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
277 return "movaps\t{%1, %0|%0, %1}";
279 return "movdqa\t{%1, %0|%0, %1}";
285 [(set_attr "type" "sselog1,ssemov,ssemov")
287 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
288 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
289 (and (eq_attr "alternative" "2")
290 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
292 (const_string "V4SF")
293 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
294 (const_string "V4SF")
295 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
296 (const_string "V2DF")
298 (const_string "TI")))])
300 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
301 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
302 ;; from memory, we'd prefer to load the memory directly into the %xmm
303 ;; register. To facilitate this happy circumstance, this pattern won't
304 ;; split until after register allocation. If the 64-bit value didn't
305 ;; come from memory, this is the best we can do. This is much better
306 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
309 (define_insn_and_split "movdi_to_sse"
311 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
312 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
313 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
314 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
316 "&& reload_completed"
319 if (register_operand (operands[1], DImode))
321 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
322 Assemble the 64-bit DImode value in an xmm register. */
323 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
324 gen_rtx_SUBREG (SImode, operands[1], 0)));
325 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
326 gen_rtx_SUBREG (SImode, operands[1], 4)));
327 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
330 else if (memory_operand (operands[1], DImode))
331 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
332 operands[1], const0_rtx));
338 [(set (match_operand:V4SF 0 "register_operand" "")
339 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
340 "TARGET_SSE && reload_completed"
343 (vec_duplicate:V4SF (match_dup 1))
347 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
348 operands[2] = CONST0_RTX (V4SFmode);
352 [(set (match_operand:V2DF 0 "register_operand" "")
353 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
354 "TARGET_SSE2 && reload_completed"
355 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
357 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
358 operands[2] = CONST0_RTX (DFmode);
361 (define_expand "push<mode>1"
362 [(match_operand:AVX256MODE 0 "register_operand" "")]
365 ix86_expand_push (<MODE>mode, operands[0]);
369 (define_expand "push<mode>1"
370 [(match_operand:SSEMODE16 0 "register_operand" "")]
373 ix86_expand_push (<MODE>mode, operands[0]);
377 (define_expand "movmisalign<mode>"
378 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
379 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
382 ix86_expand_vector_move_misalign (<MODE>mode, operands);
386 (define_expand "movmisalign<mode>"
387 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
388 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
391 ix86_expand_vector_move_misalign (<MODE>mode, operands);
395 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
396 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
398 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
400 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
402 if (MEM_P (operands[0]) && MEM_P (operands[1]))
403 operands[1] = force_reg (<MODE>mode, operands[1]);
406 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
407 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
409 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
411 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
412 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
413 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
414 [(set_attr "type" "ssemov")
415 (set_attr "movu" "1")
416 (set_attr "prefix" "vex")
417 (set_attr "mode" "<MODE>")])
419 (define_insn "sse2_movq128"
420 [(set (match_operand:V2DI 0 "register_operand" "=x")
423 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
424 (parallel [(const_int 0)]))
427 "%vmovq\t{%1, %0|%0, %1}"
428 [(set_attr "type" "ssemov")
429 (set_attr "prefix" "maybe_vex")
430 (set_attr "mode" "TI")])
432 (define_expand "<sse>_movu<ssemodesuffix>"
433 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
435 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
437 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
439 if (MEM_P (operands[0]) && MEM_P (operands[1]))
440 operands[1] = force_reg (<MODE>mode, operands[1]);
443 (define_insn "*<sse>_movu<ssemodesuffix>"
444 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
446 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
448 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
449 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
450 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssemov")
452 (set_attr "movu" "1")
453 (set_attr "mode" "<MODE>")])
455 (define_expand "avx_movdqu<avxmodesuffix>"
456 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
458 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
462 if (MEM_P (operands[0]) && MEM_P (operands[1]))
463 operands[1] = force_reg (<MODE>mode, operands[1]);
466 (define_insn "*avx_movdqu<avxmodesuffix>"
467 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
469 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
471 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
472 "vmovdqu\t{%1, %0|%0, %1}"
473 [(set_attr "type" "ssemov")
474 (set_attr "movu" "1")
475 (set_attr "prefix" "vex")
476 (set_attr "mode" "<avxvecmode>")])
478 (define_expand "sse2_movdqu"
479 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
480 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
484 if (MEM_P (operands[0]) && MEM_P (operands[1]))
485 operands[1] = force_reg (V16QImode, operands[1]);
488 (define_insn "*sse2_movdqu"
489 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
490 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
492 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
493 "movdqu\t{%1, %0|%0, %1}"
494 [(set_attr "type" "ssemov")
495 (set_attr "movu" "1")
496 (set_attr "prefix_data16" "1")
497 (set_attr "mode" "TI")])
499 (define_insn "avx_movnt<mode>"
500 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
502 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
504 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
505 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
506 [(set_attr "type" "ssemov")
507 (set_attr "prefix" "vex")
508 (set_attr "mode" "<MODE>")])
510 (define_insn "<sse>_movnt<mode>"
511 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
513 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
515 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
516 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
517 [(set_attr "type" "ssemov")
518 (set_attr "mode" "<MODE>")])
520 (define_insn "avx_movnt<mode>"
521 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
523 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
526 "vmovntdq\t{%1, %0|%0, %1}"
527 [(set_attr "type" "ssecvt")
528 (set_attr "prefix" "vex")
529 (set_attr "mode" "<avxvecmode>")])
531 (define_insn "sse2_movntv2di"
532 [(set (match_operand:V2DI 0 "memory_operand" "=m")
533 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
536 "movntdq\t{%1, %0|%0, %1}"
537 [(set_attr "type" "ssemov")
538 (set_attr "prefix_data16" "1")
539 (set_attr "mode" "TI")])
541 (define_insn "sse2_movntsi"
542 [(set (match_operand:SI 0 "memory_operand" "=m")
543 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
546 "movnti\t{%1, %0|%0, %1}"
547 [(set_attr "type" "ssemov")
548 (set_attr "prefix_data16" "0")
549 (set_attr "mode" "V2DF")])
551 (define_insn "avx_lddqu<avxmodesuffix>"
552 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
554 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
557 "vlddqu\t{%1, %0|%0, %1}"
558 [(set_attr "type" "ssecvt")
559 (set_attr "movu" "1")
560 (set_attr "prefix" "vex")
561 (set_attr "mode" "<avxvecmode>")])
563 (define_insn "sse3_lddqu"
564 [(set (match_operand:V16QI 0 "register_operand" "=x")
565 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
568 "lddqu\t{%1, %0|%0, %1}"
569 [(set_attr "type" "ssemov")
570 (set_attr "movu" "1")
571 (set_attr "prefix_data16" "0")
572 (set_attr "prefix_rep" "1")
573 (set_attr "mode" "TI")])
575 ; Expand patterns for non-temporal stores. At the moment, only those
576 ; that directly map to insns are defined; it would be possible to
577 ; define patterns for other modes that would expand to several insns.
579 (define_expand "storent<mode>"
580 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
582 [(match_operand:STORENT_MODE 1 "register_operand" "")]
585 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
587 ;; Parallel floating point arithmetic
589 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
591 (define_expand "<code><mode>2"
592 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
593 (absneg:VEC_FLOAT_MODE
594 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
596 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
598 (define_insn_and_split "*avx_absneg<mode>2"
599 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
600 (match_operator:AVXMODEF2P 3 "absneg_operator"
601 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")]))
602 (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))]
603 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
605 "&& reload_completed"
610 if (MEM_P (operands[1]))
611 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
612 <MODE>mode, operands[2], operands[1]);
614 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
615 <MODE>mode, operands[1], operands[2]);
616 t = gen_rtx_SET (VOIDmode, operands[0], t);
621 (define_insn_and_split "*sse_absneg<mode>2"
622 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
623 (match_operator:SSEMODEF2P 3 "absneg_operator"
624 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")]))
625 (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))]
626 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
628 "&& reload_completed"
633 t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1];
634 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
635 <MODE>mode, operands[0], t);
636 t = gen_rtx_SET (VOIDmode, operands[0], t);
641 (define_expand "<plusminus_insn><mode>3"
642 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
643 (plusminus:AVX256MODEF2P
644 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
645 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
646 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
647 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
649 (define_insn "*avx_<plusminus_insn><mode>3"
650 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
651 (plusminus:AVXMODEF2P
652 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
653 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
655 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
656 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "prefix" "vex")
659 (set_attr "mode" "<avxvecmode>")])
661 (define_expand "<plusminus_insn><mode>3"
662 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
663 (plusminus:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
666 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
667 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
669 (define_insn "*<plusminus_insn><mode>3"
670 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
671 (plusminus:SSEMODEF2P
672 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
673 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
674 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
675 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
676 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
677 [(set_attr "type" "sseadd")
678 (set_attr "mode" "<MODE>")])
680 (define_insn "*avx_vm<plusminus_insn><mode>3"
681 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
682 (vec_merge:SSEMODEF2P
683 (plusminus:SSEMODEF2P
684 (match_operand:SSEMODEF2P 1 "register_operand" "x")
685 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
688 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
689 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
690 [(set_attr "type" "sseadd")
691 (set_attr "prefix" "vex")
692 (set_attr "mode" "<ssescalarmode>")])
694 (define_insn "<sse>_vm<plusminus_insn><mode>3"
695 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
696 (vec_merge:SSEMODEF2P
697 (plusminus:SSEMODEF2P
698 (match_operand:SSEMODEF2P 1 "register_operand" "0")
699 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
702 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
703 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
704 [(set_attr "type" "sseadd")
705 (set_attr "mode" "<ssescalarmode>")])
707 (define_expand "mul<mode>3"
708 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
710 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
711 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
712 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
713 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
715 (define_insn "*avx_mul<mode>3"
716 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
718 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
719 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
720 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
721 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
722 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
723 [(set_attr "type" "ssemul")
724 (set_attr "prefix" "vex")
725 (set_attr "mode" "<avxvecmode>")])
727 (define_expand "mul<mode>3"
728 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
730 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
732 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
733 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
735 (define_insn "*mul<mode>3"
736 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
739 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
740 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
741 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
742 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
743 [(set_attr "type" "ssemul")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "*avx_vmmul<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
748 (vec_merge:SSEMODEF2P
750 (match_operand:SSEMODEF2P 1 "register_operand" "x")
751 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
754 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
755 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
756 [(set_attr "type" "ssemul")
757 (set_attr "prefix" "vex")
758 (set_attr "mode" "<ssescalarmode>")])
760 (define_insn "<sse>_vmmul<mode>3"
761 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
762 (vec_merge:SSEMODEF2P
764 (match_operand:SSEMODEF2P 1 "register_operand" "0")
765 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
768 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
769 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
770 [(set_attr "type" "ssemul")
771 (set_attr "mode" "<ssescalarmode>")])
773 (define_expand "divv8sf3"
774 [(set (match_operand:V8SF 0 "register_operand" "")
775 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
776 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
779 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
781 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
782 && flag_finite_math_only && !flag_trapping_math
783 && flag_unsafe_math_optimizations)
785 ix86_emit_swdivsf (operands[0], operands[1],
786 operands[2], V8SFmode);
791 (define_expand "divv4df3"
792 [(set (match_operand:V4DF 0 "register_operand" "")
793 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
794 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
796 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
798 (define_insn "avx_div<mode>3"
799 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
801 (match_operand:AVXMODEF2P 1 "register_operand" "x")
802 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
803 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
804 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
805 [(set_attr "type" "ssediv")
806 (set_attr "prefix" "vex")
807 (set_attr "mode" "<MODE>")])
809 (define_expand "divv4sf3"
810 [(set (match_operand:V4SF 0 "register_operand" "")
811 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
812 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
815 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
816 && flag_finite_math_only && !flag_trapping_math
817 && flag_unsafe_math_optimizations)
819 ix86_emit_swdivsf (operands[0], operands[1],
820 operands[2], V4SFmode);
825 (define_expand "divv2df3"
826 [(set (match_operand:V2DF 0 "register_operand" "")
827 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
828 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
831 (define_insn "*avx_div<mode>3"
832 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
834 (match_operand:SSEMODEF2P 1 "register_operand" "x")
835 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
836 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
837 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
838 [(set_attr "type" "ssediv")
839 (set_attr "prefix" "vex")
840 (set_attr "mode" "<MODE>")])
842 (define_insn "<sse>_div<mode>3"
843 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
845 (match_operand:SSEMODEF2P 1 "register_operand" "0")
846 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
847 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
848 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssediv")
850 (set_attr "mode" "<MODE>")])
852 (define_insn "*avx_vmdiv<mode>3"
853 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
854 (vec_merge:SSEMODEF2P
856 (match_operand:SSEMODEF2P 1 "register_operand" "x")
857 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
860 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
861 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
862 [(set_attr "type" "ssediv")
863 (set_attr "prefix" "vex")
864 (set_attr "mode" "<ssescalarmode>")])
866 (define_insn "<sse>_vmdiv<mode>3"
867 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
868 (vec_merge:SSEMODEF2P
870 (match_operand:SSEMODEF2P 1 "register_operand" "0")
871 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
874 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
875 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
876 [(set_attr "type" "ssediv")
877 (set_attr "mode" "<ssescalarmode>")])
879 (define_insn "avx_rcpv8sf2"
880 [(set (match_operand:V8SF 0 "register_operand" "=x")
882 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
884 "vrcpps\t{%1, %0|%0, %1}"
885 [(set_attr "type" "sse")
886 (set_attr "prefix" "vex")
887 (set_attr "mode" "V8SF")])
889 (define_insn "sse_rcpv4sf2"
890 [(set (match_operand:V4SF 0 "register_operand" "=x")
892 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
894 "%vrcpps\t{%1, %0|%0, %1}"
895 [(set_attr "type" "sse")
896 (set_attr "atom_sse_attr" "rcp")
897 (set_attr "prefix" "maybe_vex")
898 (set_attr "mode" "V4SF")])
900 (define_insn "*avx_vmrcpv4sf2"
901 [(set (match_operand:V4SF 0 "register_operand" "=x")
903 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
905 (match_operand:V4SF 2 "register_operand" "x")
908 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
909 [(set_attr "type" "sse")
910 (set_attr "prefix" "vex")
911 (set_attr "mode" "SF")])
913 (define_insn "sse_vmrcpv4sf2"
914 [(set (match_operand:V4SF 0 "register_operand" "=x")
916 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
918 (match_operand:V4SF 2 "register_operand" "0")
921 "rcpss\t{%1, %0|%0, %1}"
922 [(set_attr "type" "sse")
923 (set_attr "atom_sse_attr" "rcp")
924 (set_attr "mode" "SF")])
926 (define_expand "sqrtv8sf2"
927 [(set (match_operand:V8SF 0 "register_operand" "")
928 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
931 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
932 && flag_finite_math_only && !flag_trapping_math
933 && flag_unsafe_math_optimizations)
935 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
940 (define_insn "avx_sqrtv8sf2"
941 [(set (match_operand:V8SF 0 "register_operand" "=x")
942 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
944 "vsqrtps\t{%1, %0|%0, %1}"
945 [(set_attr "type" "sse")
946 (set_attr "prefix" "vex")
947 (set_attr "mode" "V8SF")])
949 (define_expand "sqrtv4sf2"
950 [(set (match_operand:V4SF 0 "register_operand" "")
951 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
954 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
955 && flag_finite_math_only && !flag_trapping_math
956 && flag_unsafe_math_optimizations)
958 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
963 (define_insn "sse_sqrtv4sf2"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
967 "%vsqrtps\t{%1, %0|%0, %1}"
968 [(set_attr "type" "sse")
969 (set_attr "atom_sse_attr" "sqrt")
970 (set_attr "prefix" "maybe_vex")
971 (set_attr "mode" "V4SF")])
973 (define_insn "sqrtv4df2"
974 [(set (match_operand:V4DF 0 "register_operand" "=x")
975 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
977 "vsqrtpd\t{%1, %0|%0, %1}"
978 [(set_attr "type" "sse")
979 (set_attr "prefix" "vex")
980 (set_attr "mode" "V4DF")])
982 (define_insn "sqrtv2df2"
983 [(set (match_operand:V2DF 0 "register_operand" "=x")
984 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
986 "%vsqrtpd\t{%1, %0|%0, %1}"
987 [(set_attr "type" "sse")
988 (set_attr "prefix" "maybe_vex")
989 (set_attr "mode" "V2DF")])
991 (define_insn "*avx_vmsqrt<mode>2"
992 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
993 (vec_merge:SSEMODEF2P
995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
996 (match_operand:SSEMODEF2P 2 "register_operand" "x")
998 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
999 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1000 [(set_attr "type" "sse")
1001 (set_attr "prefix" "vex")
1002 (set_attr "mode" "<ssescalarmode>")])
1004 (define_insn "<sse>_vmsqrt<mode>2"
1005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1006 (vec_merge:SSEMODEF2P
1008 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
1009 (match_operand:SSEMODEF2P 2 "register_operand" "0")
1011 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1012 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
1013 [(set_attr "type" "sse")
1014 (set_attr "atom_sse_attr" "sqrt")
1015 (set_attr "mode" "<ssescalarmode>")])
1017 (define_expand "rsqrtv8sf2"
1018 [(set (match_operand:V8SF 0 "register_operand" "")
1020 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1021 "TARGET_AVX && TARGET_SSE_MATH"
1023 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
1027 (define_insn "avx_rsqrtv8sf2"
1028 [(set (match_operand:V8SF 0 "register_operand" "=x")
1030 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1032 "vrsqrtps\t{%1, %0|%0, %1}"
1033 [(set_attr "type" "sse")
1034 (set_attr "prefix" "vex")
1035 (set_attr "mode" "V8SF")])
1037 (define_expand "rsqrtv4sf2"
1038 [(set (match_operand:V4SF 0 "register_operand" "")
1040 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1043 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
1047 (define_insn "sse_rsqrtv4sf2"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x")
1050 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1052 "%vrsqrtps\t{%1, %0|%0, %1}"
1053 [(set_attr "type" "sse")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "V4SF")])
1057 (define_insn "*avx_vmrsqrtv4sf2"
1058 [(set (match_operand:V4SF 0 "register_operand" "=x")
1060 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1062 (match_operand:V4SF 2 "register_operand" "x")
1065 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1066 [(set_attr "type" "sse")
1067 (set_attr "prefix" "vex")
1068 (set_attr "mode" "SF")])
1070 (define_insn "sse_vmrsqrtv4sf2"
1071 [(set (match_operand:V4SF 0 "register_operand" "=x")
1073 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1075 (match_operand:V4SF 2 "register_operand" "0")
1078 "rsqrtss\t{%1, %0|%0, %1}"
1079 [(set_attr "type" "sse")
1080 (set_attr "mode" "SF")])
1082 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1083 ;; isn't really correct, as those rtl operators aren't defined when
1084 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1086 (define_expand "<code><mode>3"
1087 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1088 (smaxmin:AVX256MODEF2P
1089 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1090 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1091 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1093 if (!flag_finite_math_only)
1094 operands[1] = force_reg (<MODE>mode, operands[1]);
1095 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1098 (define_expand "<code><mode>3"
1099 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1101 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1102 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1103 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1105 if (!flag_finite_math_only)
1106 operands[1] = force_reg (<MODE>mode, operands[1]);
1107 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1110 (define_insn "*avx_<code><mode>3_finite"
1111 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1113 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1114 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1115 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1116 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1117 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1118 [(set_attr "type" "sseadd")
1119 (set_attr "prefix" "vex")
1120 (set_attr "mode" "<MODE>")])
1122 (define_insn "*<code><mode>3_finite"
1123 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1125 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1126 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1127 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1128 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1129 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1130 [(set_attr "type" "sseadd")
1131 (set_attr "mode" "<MODE>")])
1133 (define_insn "*avx_<code><mode>3"
1134 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1136 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1137 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1138 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1139 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1140 [(set_attr "type" "sseadd")
1141 (set_attr "prefix" "vex")
1142 (set_attr "mode" "<avxvecmode>")])
1144 (define_insn "*<code><mode>3"
1145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1147 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1148 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1149 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1150 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1151 [(set_attr "type" "sseadd")
1152 (set_attr "mode" "<MODE>")])
1154 (define_insn "*avx_vm<code><mode>3"
1155 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1156 (vec_merge:SSEMODEF2P
1158 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1159 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1162 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1163 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1164 [(set_attr "type" "sse")
1165 (set_attr "prefix" "vex")
1166 (set_attr "mode" "<ssescalarmode>")])
1168 (define_insn "<sse>_vm<code><mode>3"
1169 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1170 (vec_merge:SSEMODEF2P
1172 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1173 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1176 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1177 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1178 [(set_attr "type" "sseadd")
1179 (set_attr "mode" "<ssescalarmode>")])
1181 ;; These versions of the min/max patterns implement exactly the operations
1182 ;; min = (op1 < op2 ? op1 : op2)
1183 ;; max = (!(op1 < op2) ? op1 : op2)
1184 ;; Their operands are not commutative, and thus they may be used in the
1185 ;; presence of -0.0 and NaN.
1187 (define_insn "*avx_ieee_smin<mode>3"
1188 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1190 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1191 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1193 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1194 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1195 [(set_attr "type" "sseadd")
1196 (set_attr "prefix" "vex")
1197 (set_attr "mode" "<avxvecmode>")])
1199 (define_insn "*avx_ieee_smax<mode>3"
1200 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1202 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1203 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1205 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1206 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "type" "sseadd")
1208 (set_attr "prefix" "vex")
1209 (set_attr "mode" "<avxvecmode>")])
1211 (define_insn "*ieee_smin<mode>3"
1212 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1214 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1215 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1217 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1218 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1219 [(set_attr "type" "sseadd")
1220 (set_attr "mode" "<MODE>")])
1222 (define_insn "*ieee_smax<mode>3"
1223 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1225 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1226 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1228 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1229 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1230 [(set_attr "type" "sseadd")
1231 (set_attr "mode" "<MODE>")])
1233 (define_insn "avx_addsubv8sf3"
1234 [(set (match_operand:V8SF 0 "register_operand" "=x")
1237 (match_operand:V8SF 1 "register_operand" "x")
1238 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1239 (minus:V8SF (match_dup 1) (match_dup 2))
1242 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1243 [(set_attr "type" "sseadd")
1244 (set_attr "prefix" "vex")
1245 (set_attr "mode" "V8SF")])
1247 (define_insn "avx_addsubv4df3"
1248 [(set (match_operand:V4DF 0 "register_operand" "=x")
1251 (match_operand:V4DF 1 "register_operand" "x")
1252 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1253 (minus:V4DF (match_dup 1) (match_dup 2))
1256 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V4DF")])
1261 (define_insn "*avx_addsubv4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x")
1265 (match_operand:V4SF 1 "register_operand" "x")
1266 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1267 (minus:V4SF (match_dup 1) (match_dup 2))
1270 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1271 [(set_attr "type" "sseadd")
1272 (set_attr "prefix" "vex")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "sse3_addsubv4sf3"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:V4SF 1 "register_operand" "0")
1280 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1281 (minus:V4SF (match_dup 1) (match_dup 2))
1284 "addsubps\t{%2, %0|%0, %2}"
1285 [(set_attr "type" "sseadd")
1286 (set_attr "prefix_rep" "1")
1287 (set_attr "mode" "V4SF")])
1289 (define_insn "*avx_addsubv2df3"
1290 [(set (match_operand:V2DF 0 "register_operand" "=x")
1293 (match_operand:V2DF 1 "register_operand" "x")
1294 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1295 (minus:V2DF (match_dup 1) (match_dup 2))
1298 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1299 [(set_attr "type" "sseadd")
1300 (set_attr "prefix" "vex")
1301 (set_attr "mode" "V2DF")])
1303 (define_insn "sse3_addsubv2df3"
1304 [(set (match_operand:V2DF 0 "register_operand" "=x")
1307 (match_operand:V2DF 1 "register_operand" "0")
1308 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1309 (minus:V2DF (match_dup 1) (match_dup 2))
1312 "addsubpd\t{%2, %0|%0, %2}"
1313 [(set_attr "type" "sseadd")
1314 (set_attr "atom_unit" "complex")
1315 (set_attr "mode" "V2DF")])
1317 (define_insn "avx_h<plusminus_insn>v4df3"
1318 [(set (match_operand:V4DF 0 "register_operand" "=x")
1323 (match_operand:V4DF 1 "register_operand" "x")
1324 (parallel [(const_int 0)]))
1325 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1327 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1328 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1332 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1333 (parallel [(const_int 0)]))
1334 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1336 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1337 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1339 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1340 [(set_attr "type" "sseadd")
1341 (set_attr "prefix" "vex")
1342 (set_attr "mode" "V4DF")])
1344 (define_insn "avx_h<plusminus_insn>v8sf3"
1345 [(set (match_operand:V8SF 0 "register_operand" "=x")
1351 (match_operand:V8SF 1 "register_operand" "x")
1352 (parallel [(const_int 0)]))
1353 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1355 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1356 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1360 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1361 (parallel [(const_int 0)]))
1362 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1364 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1365 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1369 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1370 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1372 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1373 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1376 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1377 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1379 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1380 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1382 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1383 [(set_attr "type" "sseadd")
1384 (set_attr "prefix" "vex")
1385 (set_attr "mode" "V8SF")])
1387 (define_insn "*avx_h<plusminus_insn>v4sf3"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1393 (match_operand:V4SF 1 "register_operand" "x")
1394 (parallel [(const_int 0)]))
1395 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1397 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1398 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1402 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1403 (parallel [(const_int 0)]))
1404 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1406 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1407 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1409 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1410 [(set_attr "type" "sseadd")
1411 (set_attr "prefix" "vex")
1412 (set_attr "mode" "V4SF")])
1414 (define_insn "sse3_h<plusminus_insn>v4sf3"
1415 [(set (match_operand:V4SF 0 "register_operand" "=x")
1420 (match_operand:V4SF 1 "register_operand" "0")
1421 (parallel [(const_int 0)]))
1422 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1424 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1425 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1429 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1430 (parallel [(const_int 0)]))
1431 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1433 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1434 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1436 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "sseadd")
1438 (set_attr "atom_unit" "complex")
1439 (set_attr "prefix_rep" "1")
1440 (set_attr "mode" "V4SF")])
1442 (define_insn "*avx_h<plusminus_insn>v2df3"
1443 [(set (match_operand:V2DF 0 "register_operand" "=x")
1447 (match_operand:V2DF 1 "register_operand" "x")
1448 (parallel [(const_int 0)]))
1449 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1452 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1453 (parallel [(const_int 0)]))
1454 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1456 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1457 [(set_attr "type" "sseadd")
1458 (set_attr "prefix" "vex")
1459 (set_attr "mode" "V2DF")])
1461 (define_insn "sse3_h<plusminus_insn>v2df3"
1462 [(set (match_operand:V2DF 0 "register_operand" "=x")
1466 (match_operand:V2DF 1 "register_operand" "0")
1467 (parallel [(const_int 0)]))
1468 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1471 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1472 (parallel [(const_int 0)]))
1473 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1475 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1476 [(set_attr "type" "sseadd")
1477 (set_attr "mode" "V2DF")])
1479 (define_expand "reduc_splus_v8sf"
1480 [(match_operand:V8SF 0 "register_operand" "")
1481 (match_operand:V8SF 1 "register_operand" "")]
1484 rtx tmp = gen_reg_rtx (V8SFmode);
1485 rtx tmp2 = gen_reg_rtx (V8SFmode);
1486 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1487 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1488 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1489 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1493 (define_expand "reduc_splus_v4sf"
1494 [(match_operand:V4SF 0 "register_operand" "")
1495 (match_operand:V4SF 1 "register_operand" "")]
1500 rtx tmp = gen_reg_rtx (V4SFmode);
1501 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1502 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1505 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1509 (define_expand "reduc_splus_v4df"
1510 [(match_operand:V4DF 0 "register_operand" "")
1511 (match_operand:V4DF 1 "register_operand" "")]
1514 rtx tmp = gen_reg_rtx (V4DFmode);
1515 rtx tmp2 = gen_reg_rtx (V4DFmode);
1516 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1517 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1518 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1522 (define_expand "reduc_splus_v2df"
1523 [(match_operand:V2DF 0 "register_operand" "")
1524 (match_operand:V2DF 1 "register_operand" "")]
1527 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1531 (define_expand "reduc_smax_v4sf"
1532 [(match_operand:V4SF 0 "register_operand" "")
1533 (match_operand:V4SF 1 "register_operand" "")]
1536 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1540 (define_expand "reduc_smin_v4sf"
1541 [(match_operand:V4SF 0 "register_operand" "")
1542 (match_operand:V4SF 1 "register_operand" "")]
1545 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1549 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1551 ;; Parallel floating point comparisons
1553 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1555 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1556 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1558 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1559 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1560 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1563 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1564 [(set_attr "type" "ssecmp")
1565 (set_attr "length_immediate" "1")
1566 (set_attr "prefix" "vex")
1567 (set_attr "mode" "<MODE>")])
1569 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1571 (vec_merge:SSEMODEF2P
1573 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1574 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1575 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1580 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1581 [(set_attr "type" "ssecmp")
1582 (set_attr "length_immediate" "1")
1583 (set_attr "prefix" "vex")
1584 (set_attr "mode" "<ssescalarmode>")])
1586 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1587 ;; may generate 256bit vector compare instructions.
1588 (define_insn "*avx_maskcmp<mode>3"
1589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1590 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1591 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1592 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1594 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1595 [(set_attr "type" "ssecmp")
1596 (set_attr "prefix" "vex")
1597 (set_attr "length_immediate" "1")
1598 (set_attr "mode" "<avxvecmode>")])
1600 (define_insn "<sse>_maskcmp<mode>3"
1601 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1602 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1603 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1604 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1606 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1607 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "ssecmp")
1609 (set_attr "length_immediate" "1")
1610 (set_attr "mode" "<MODE>")])
1612 (define_insn "*avx_vmmaskcmp<mode>3"
1613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1614 (vec_merge:SSEMODEF2P
1615 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1616 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1617 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1621 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1622 [(set_attr "type" "ssecmp")
1623 (set_attr "prefix" "vex")
1624 (set_attr "mode" "<ssescalarmode>")])
1626 (define_insn "<sse>_vmmaskcmp<mode>3"
1627 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1628 (vec_merge:SSEMODEF2P
1629 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1630 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1634 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1635 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1636 [(set_attr "type" "ssecmp")
1637 (set_attr "length_immediate" "1")
1638 (set_attr "mode" "<ssescalarmode>")])
1640 (define_insn "<sse>_comi"
1641 [(set (reg:CCFP FLAGS_REG)
1644 (match_operand:<ssevecmode> 0 "register_operand" "x")
1645 (parallel [(const_int 0)]))
1647 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1648 (parallel [(const_int 0)]))))]
1649 "SSE_FLOAT_MODE_P (<MODE>mode)"
1650 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1651 [(set_attr "type" "ssecomi")
1652 (set_attr "prefix" "maybe_vex")
1653 (set_attr "prefix_rep" "0")
1654 (set (attr "prefix_data16")
1655 (if_then_else (eq_attr "mode" "DF")
1657 (const_string "0")))
1658 (set_attr "mode" "<MODE>")])
1660 (define_insn "<sse>_ucomi"
1661 [(set (reg:CCFPU FLAGS_REG)
1664 (match_operand:<ssevecmode> 0 "register_operand" "x")
1665 (parallel [(const_int 0)]))
1667 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1668 (parallel [(const_int 0)]))))]
1669 "SSE_FLOAT_MODE_P (<MODE>mode)"
1670 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1671 [(set_attr "type" "ssecomi")
1672 (set_attr "prefix" "maybe_vex")
1673 (set_attr "prefix_rep" "0")
1674 (set (attr "prefix_data16")
1675 (if_then_else (eq_attr "mode" "DF")
1677 (const_string "0")))
1678 (set_attr "mode" "<MODE>")])
1680 (define_expand "vcond<mode>"
1681 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1682 (if_then_else:AVXMODEF2P
1683 (match_operator 3 ""
1684 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1685 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1686 (match_operand:AVXMODEF2P 1 "general_operand" "")
1687 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1688 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1689 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1691 bool ok = ix86_expand_fp_vcond (operands);
1696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1698 ;; Parallel floating point logical operations
1700 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1702 (define_insn "avx_andnot<mode>3"
1703 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1706 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1707 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1708 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1709 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1710 [(set_attr "type" "sselog")
1711 (set_attr "prefix" "vex")
1712 (set_attr "mode" "<avxvecmode>")])
1714 (define_insn "<sse>_andnot<mode>3"
1715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1718 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1719 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1720 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1721 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1722 [(set_attr "type" "sselog")
1723 (set_attr "mode" "<MODE>")])
1725 (define_expand "<code><mode>3"
1726 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1727 (any_logic:AVX256MODEF2P
1728 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1729 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1730 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1731 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1733 (define_insn "*avx_<code><mode>3"
1734 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1735 (any_logic:AVXMODEF2P
1736 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1737 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1738 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1739 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1741 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1742 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1744 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1746 [(set_attr "type" "sselog")
1747 (set_attr "prefix" "vex")
1748 (set_attr "mode" "<avxvecmode>")])
1750 (define_expand "<code><mode>3"
1751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1752 (any_logic:SSEMODEF2P
1753 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1754 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1755 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1756 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1758 (define_insn "*<code><mode>3"
1759 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1760 (any_logic:SSEMODEF2P
1761 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1762 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1763 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1764 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1766 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1767 return "<logic>ps\t{%2, %0|%0, %2}";
1769 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1771 [(set_attr "type" "sselog")
1772 (set_attr "mode" "<MODE>")])
1774 (define_expand "copysign<mode>3"
1777 (not:VEC_FLOAT_MODE (match_dup 3))
1778 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1780 (and:VEC_FLOAT_MODE (match_dup 3)
1781 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1782 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1783 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1786 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1788 operands[4] = gen_reg_rtx (<MODE>mode);
1789 operands[5] = gen_reg_rtx (<MODE>mode);
1792 ;; Also define scalar versions. These are used for abs, neg, and
1793 ;; conditional move. Using subregs into vector modes causes register
1794 ;; allocation lossage. These patterns do not allow memory operands
1795 ;; because the native instructions read the full 128-bits.
1797 (define_insn "*avx_andnot<mode>3"
1798 [(set (match_operand:MODEF 0 "register_operand" "=x")
1801 (match_operand:MODEF 1 "register_operand" "x"))
1802 (match_operand:MODEF 2 "register_operand" "x")))]
1803 "AVX_FLOAT_MODE_P (<MODE>mode)"
1804 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1805 [(set_attr "type" "sselog")
1806 (set_attr "prefix" "vex")
1807 (set_attr "mode" "<ssevecmode>")])
1809 (define_insn "*andnot<mode>3"
1810 [(set (match_operand:MODEF 0 "register_operand" "=x")
1813 (match_operand:MODEF 1 "register_operand" "0"))
1814 (match_operand:MODEF 2 "register_operand" "x")))]
1815 "SSE_FLOAT_MODE_P (<MODE>mode)"
1816 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1817 [(set_attr "type" "sselog")
1818 (set_attr "mode" "<ssevecmode>")])
1820 (define_insn "*avx_<code><mode>3"
1821 [(set (match_operand:MODEF 0 "register_operand" "=x")
1823 (match_operand:MODEF 1 "register_operand" "x")
1824 (match_operand:MODEF 2 "register_operand" "x")))]
1825 "AVX_FLOAT_MODE_P (<MODE>mode)"
1827 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1828 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1830 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1832 [(set_attr "type" "sselog")
1833 (set_attr "prefix" "vex")
1834 (set_attr "mode" "<ssevecmode>")])
1836 (define_insn "*<code><mode>3"
1837 [(set (match_operand:MODEF 0 "register_operand" "=x")
1839 (match_operand:MODEF 1 "register_operand" "0")
1840 (match_operand:MODEF 2 "register_operand" "x")))]
1841 "SSE_FLOAT_MODE_P (<MODE>mode)"
1843 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1844 return "<logic>ps\t{%2, %0|%0, %2}";
1846 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "<ssevecmode>")])
1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1853 ;; FMA4 floating point multiply/accumulate instructions. This
1854 ;; includes the scalar version of the instructions as well as the
1857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1859 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1860 ;; combine to generate a multiply/add with two memory references. We then
1861 ;; split this insn, into loading up the destination register with one of the
1862 ;; memory operations. If we don't manage to split the insn, reload will
1863 ;; generate the appropriate moves. The reason this is needed, is that combine
1864 ;; has already folded one of the memory references into both the multiply and
1865 ;; add insns, and it can't generate a new pseudo. I.e.:
1866 ;; (set (reg1) (mem (addr1)))
1867 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1868 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1870 ;; ??? This is historic, pre-dating the gimple fma transformation.
1871 ;; We could now properly represent that only one memory operand is
1872 ;; allowed and not be penalized during optimization.
1874 ;; Intrinsic FMA operations.
1876 ;; The standard names for fma is only available with SSE math enabled.
1877 (define_expand "fma<mode>4"
1878 [(set (match_operand:FMAMODE 0 "register_operand")
1880 (match_operand:FMAMODE 1 "nonimmediate_operand")
1881 (match_operand:FMAMODE 2 "nonimmediate_operand")
1882 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1883 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1886 (define_expand "fms<mode>4"
1887 [(set (match_operand:FMAMODE 0 "register_operand")
1889 (match_operand:FMAMODE 1 "nonimmediate_operand")
1890 (match_operand:FMAMODE 2 "nonimmediate_operand")
1891 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1892 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1895 (define_expand "fnma<mode>4"
1896 [(set (match_operand:FMAMODE 0 "register_operand")
1898 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1899 (match_operand:FMAMODE 2 "nonimmediate_operand")
1900 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1901 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1904 (define_expand "fnms<mode>4"
1905 [(set (match_operand:FMAMODE 0 "register_operand")
1907 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1908 (match_operand:FMAMODE 2 "nonimmediate_operand")
1909 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1910 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1913 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1914 (define_expand "fma4i_fmadd_<mode>"
1915 [(set (match_operand:FMAMODE 0 "register_operand")
1917 (match_operand:FMAMODE 1 "nonimmediate_operand")
1918 (match_operand:FMAMODE 2 "nonimmediate_operand")
1919 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1920 "TARGET_FMA || TARGET_FMA4"
1923 (define_insn "*fma4i_fmadd_<mode>"
1924 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1926 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1927 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1928 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1930 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1931 [(set_attr "type" "ssemuladd")
1932 (set_attr "mode" "<MODE>")])
1934 (define_insn "*fma4i_fmsub_<mode>"
1935 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1937 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1938 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1940 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1942 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1946 (define_insn "*fma4i_fnmadd_<mode>"
1947 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1950 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1951 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1952 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1954 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1955 [(set_attr "type" "ssemuladd")
1956 (set_attr "mode" "<MODE>")])
1958 (define_insn "*fma4i_fnmsub_<mode>"
1959 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1962 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1963 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1965 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1967 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1968 [(set_attr "type" "ssemuladd")
1969 (set_attr "mode" "<MODE>")])
1971 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1972 ;; entire destination register, with the high-order elements zeroed.
1974 (define_expand "fma4i_vmfmadd_<mode>"
1975 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1976 (vec_merge:SSEMODEF2P
1978 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1979 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1980 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1985 operands[4] = CONST0_RTX (<MODE>mode);
1988 (define_insn "*fma4i_vmfmadd_<mode>"
1989 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1990 (vec_merge:SSEMODEF2P
1992 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1993 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1994 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1995 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1998 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1999 [(set_attr "type" "ssemuladd")
2000 (set_attr "mode" "<MODE>")])
2002 (define_insn "*fma4i_vmfmsub_<mode>"
2003 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2004 (vec_merge:SSEMODEF2P
2006 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2007 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
2009 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
2010 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2013 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2014 [(set_attr "type" "ssemuladd")
2015 (set_attr "mode" "<MODE>")])
2017 (define_insn "*fma4i_vmfnmadd_<mode>"
2018 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2019 (vec_merge:SSEMODEF2P
2022 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2023 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
2024 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2025 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2028 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2029 [(set_attr "type" "ssemuladd")
2030 (set_attr "mode" "<MODE>")])
2032 (define_insn "*fma4i_vmfnmsub_<mode>"
2033 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2034 (vec_merge:SSEMODEF2P
2037 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2038 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
2040 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
2041 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2044 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2045 [(set_attr "type" "ssemuladd")
2046 (set_attr "mode" "<MODE>")])
2048 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2050 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2054 ;; It would be possible to represent these without the UNSPEC as
2057 ;; (fma op1 op2 op3)
2058 ;; (fma op1 op2 (neg op3))
2061 ;; But this doesn't seem useful in practice.
2063 (define_expand "fmaddsub_<mode>"
2064 [(set (match_operand:AVXMODEF2P 0 "register_operand")
2066 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
2067 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
2068 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
2070 "TARGET_FMA || TARGET_FMA4"
2073 (define_insn "*fma4_fmaddsub_<mode>"
2074 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2076 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2077 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2078 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
2081 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2082 [(set_attr "type" "ssemuladd")
2083 (set_attr "mode" "<MODE>")])
2085 (define_insn "*fma4_fmsubadd_<mode>"
2086 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2088 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2089 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2091 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2094 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2095 [(set_attr "type" "ssemuladd")
2096 (set_attr "mode" "<MODE>")])
2098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2100 ;; FMA3 floating point multiply/accumulate instructions.
2102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2104 (define_insn "*fma_fmadd_<mode>"
2105 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2107 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2108 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2109 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2112 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2113 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2114 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2115 [(set_attr "type" "ssemuladd")
2116 (set_attr "mode" "<MODE>")])
2118 (define_insn "*fma_fmsub_<mode>"
2119 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2121 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2122 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2124 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2127 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2128 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2129 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2130 [(set_attr "type" "ssemuladd")
2131 (set_attr "mode" "<MODE>")])
2133 (define_insn "*fma_fmadd_<mode>"
2134 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2137 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2138 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2139 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2142 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2143 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2144 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2145 [(set_attr "type" "ssemuladd")
2146 (set_attr "mode" "<MODE>")])
2148 (define_insn "*fma_fmsub_<mode>"
2149 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2152 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2153 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2155 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2158 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2159 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2160 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2161 [(set_attr "type" "ssemuladd")
2162 (set_attr "mode" "<MODE>")])
2164 (define_insn "*fma_fmaddsub_<mode>"
2165 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2167 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2168 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2169 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
2173 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2174 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2175 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<MODE>")])
2179 (define_insn "*fma_fmsubadd_<mode>"
2180 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2182 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2183 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2185 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
2189 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2190 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2191 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "<MODE>")])
2195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2197 ;; Parallel single-precision floating point conversion operations
2199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2201 (define_insn "sse_cvtpi2ps"
2202 [(set (match_operand:V4SF 0 "register_operand" "=x")
2205 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2206 (match_operand:V4SF 1 "register_operand" "0")
2209 "cvtpi2ps\t{%2, %0|%0, %2}"
2210 [(set_attr "type" "ssecvt")
2211 (set_attr "mode" "V4SF")])
2213 (define_insn "sse_cvtps2pi"
2214 [(set (match_operand:V2SI 0 "register_operand" "=y")
2216 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2218 (parallel [(const_int 0) (const_int 1)])))]
2220 "cvtps2pi\t{%1, %0|%0, %1}"
2221 [(set_attr "type" "ssecvt")
2222 (set_attr "unit" "mmx")
2223 (set_attr "mode" "DI")])
2225 (define_insn "sse_cvttps2pi"
2226 [(set (match_operand:V2SI 0 "register_operand" "=y")
2228 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2229 (parallel [(const_int 0) (const_int 1)])))]
2231 "cvttps2pi\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "unit" "mmx")
2234 (set_attr "prefix_rep" "0")
2235 (set_attr "mode" "SF")])
2237 (define_insn "*avx_cvtsi2ss"
2238 [(set (match_operand:V4SF 0 "register_operand" "=x")
2241 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2242 (match_operand:V4SF 1 "register_operand" "x")
2245 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2246 [(set_attr "type" "sseicvt")
2247 (set_attr "prefix" "vex")
2248 (set_attr "mode" "SF")])
2250 (define_insn "sse_cvtsi2ss"
2251 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2254 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2255 (match_operand:V4SF 1 "register_operand" "0,0")
2258 "cvtsi2ss\t{%2, %0|%0, %2}"
2259 [(set_attr "type" "sseicvt")
2260 (set_attr "athlon_decode" "vector,double")
2261 (set_attr "amdfam10_decode" "vector,double")
2262 (set_attr "bdver1_decode" "double,direct")
2263 (set_attr "mode" "SF")])
2265 (define_insn "*avx_cvtsi2ssq"
2266 [(set (match_operand:V4SF 0 "register_operand" "=x")
2269 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2270 (match_operand:V4SF 1 "register_operand" "x")
2272 "TARGET_AVX && TARGET_64BIT"
2273 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2274 [(set_attr "type" "sseicvt")
2275 (set_attr "length_vex" "4")
2276 (set_attr "prefix" "vex")
2277 (set_attr "mode" "SF")])
2279 (define_insn "sse_cvtsi2ssq"
2280 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2283 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2284 (match_operand:V4SF 1 "register_operand" "0,0")
2286 "TARGET_SSE && TARGET_64BIT"
2287 "cvtsi2ssq\t{%2, %0|%0, %2}"
2288 [(set_attr "type" "sseicvt")
2289 (set_attr "prefix_rex" "1")
2290 (set_attr "athlon_decode" "vector,double")
2291 (set_attr "amdfam10_decode" "vector,double")
2292 (set_attr "bdver1_decode" "double,direct")
2293 (set_attr "mode" "SF")])
2295 (define_insn "sse_cvtss2si"
2296 [(set (match_operand:SI 0 "register_operand" "=r,r")
2299 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2300 (parallel [(const_int 0)]))]
2301 UNSPEC_FIX_NOTRUNC))]
2303 "%vcvtss2si\t{%1, %0|%0, %1}"
2304 [(set_attr "type" "sseicvt")
2305 (set_attr "athlon_decode" "double,vector")
2306 (set_attr "bdver1_decode" "double,double")
2307 (set_attr "prefix_rep" "1")
2308 (set_attr "prefix" "maybe_vex")
2309 (set_attr "mode" "SI")])
2311 (define_insn "sse_cvtss2si_2"
2312 [(set (match_operand:SI 0 "register_operand" "=r,r")
2313 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2314 UNSPEC_FIX_NOTRUNC))]
2316 "%vcvtss2si\t{%1, %0|%0, %1}"
2317 [(set_attr "type" "sseicvt")
2318 (set_attr "athlon_decode" "double,vector")
2319 (set_attr "amdfam10_decode" "double,double")
2320 (set_attr "bdver1_decode" "double,double")
2321 (set_attr "prefix_rep" "1")
2322 (set_attr "prefix" "maybe_vex")
2323 (set_attr "mode" "SI")])
2325 (define_insn "sse_cvtss2siq"
2326 [(set (match_operand:DI 0 "register_operand" "=r,r")
2329 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2330 (parallel [(const_int 0)]))]
2331 UNSPEC_FIX_NOTRUNC))]
2332 "TARGET_SSE && TARGET_64BIT"
2333 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "sseicvt")
2335 (set_attr "athlon_decode" "double,vector")
2336 (set_attr "bdver1_decode" "double,double")
2337 (set_attr "prefix_rep" "1")
2338 (set_attr "prefix" "maybe_vex")
2339 (set_attr "mode" "DI")])
2341 (define_insn "sse_cvtss2siq_2"
2342 [(set (match_operand:DI 0 "register_operand" "=r,r")
2343 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2344 UNSPEC_FIX_NOTRUNC))]
2345 "TARGET_SSE && TARGET_64BIT"
2346 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2347 [(set_attr "type" "sseicvt")
2348 (set_attr "athlon_decode" "double,vector")
2349 (set_attr "amdfam10_decode" "double,double")
2350 (set_attr "bdver1_decode" "double,double")
2351 (set_attr "prefix_rep" "1")
2352 (set_attr "prefix" "maybe_vex")
2353 (set_attr "mode" "DI")])
2355 (define_insn "sse_cvttss2si"
2356 [(set (match_operand:SI 0 "register_operand" "=r,r")
2359 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2360 (parallel [(const_int 0)]))))]
2362 "%vcvttss2si\t{%1, %0|%0, %1}"
2363 [(set_attr "type" "sseicvt")
2364 (set_attr "athlon_decode" "double,vector")
2365 (set_attr "amdfam10_decode" "double,double")
2366 (set_attr "bdver1_decode" "double,double")
2367 (set_attr "prefix_rep" "1")
2368 (set_attr "prefix" "maybe_vex")
2369 (set_attr "mode" "SI")])
2371 (define_insn "sse_cvttss2siq"
2372 [(set (match_operand:DI 0 "register_operand" "=r,r")
2375 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2376 (parallel [(const_int 0)]))))]
2377 "TARGET_SSE && TARGET_64BIT"
2378 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2379 [(set_attr "type" "sseicvt")
2380 (set_attr "athlon_decode" "double,vector")
2381 (set_attr "amdfam10_decode" "double,double")
2382 (set_attr "bdver1_decode" "double,double")
2383 (set_attr "prefix_rep" "1")
2384 (set_attr "prefix" "maybe_vex")
2385 (set_attr "mode" "DI")])
2387 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2388 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2389 (float:AVXMODEDCVTDQ2PS
2390 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2392 "vcvtdq2ps\t{%1, %0|%0, %1}"
2393 [(set_attr "type" "ssecvt")
2394 (set_attr "prefix" "vex")
2395 (set_attr "mode" "<avxvecmode>")])
2397 (define_insn "sse2_cvtdq2ps"
2398 [(set (match_operand:V4SF 0 "register_operand" "=x")
2399 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2401 "cvtdq2ps\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "ssecvt")
2403 (set_attr "mode" "V4SF")])
2405 (define_expand "sse2_cvtudq2ps"
2407 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2409 (lt:V4SF (match_dup 5) (match_dup 3)))
2411 (and:V4SF (match_dup 6) (match_dup 4)))
2412 (set (match_operand:V4SF 0 "register_operand" "")
2413 (plus:V4SF (match_dup 5) (match_dup 7)))]
2416 REAL_VALUE_TYPE TWO32r;
2420 real_ldexp (&TWO32r, &dconst1, 32);
2421 x = const_double_from_real_value (TWO32r, SFmode);
2423 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2424 operands[4] = force_reg (V4SFmode,
2425 ix86_build_const_vector (V4SFmode, 1, x));
2427 for (i = 5; i < 8; i++)
2428 operands[i] = gen_reg_rtx (V4SFmode);
2431 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2432 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2433 (unspec:AVXMODEDCVTPS2DQ
2434 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2435 UNSPEC_FIX_NOTRUNC))]
2437 "vcvtps2dq\t{%1, %0|%0, %1}"
2438 [(set_attr "type" "ssecvt")
2439 (set_attr "prefix" "vex")
2440 (set_attr "mode" "<avxvecmode>")])
2442 (define_insn "sse2_cvtps2dq"
2443 [(set (match_operand:V4SI 0 "register_operand" "=x")
2444 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2445 UNSPEC_FIX_NOTRUNC))]
2447 "cvtps2dq\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "ssecvt")
2449 (set_attr "prefix_data16" "1")
2450 (set_attr "mode" "TI")])
2452 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2453 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2454 (fix:AVXMODEDCVTPS2DQ
2455 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2457 "vcvttps2dq\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "ssecvt")
2459 (set_attr "prefix" "vex")
2460 (set_attr "mode" "<avxvecmode>")])
2462 (define_insn "sse2_cvttps2dq"
2463 [(set (match_operand:V4SI 0 "register_operand" "=x")
2464 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2466 "cvttps2dq\t{%1, %0|%0, %1}"
2467 [(set_attr "type" "ssecvt")
2468 (set_attr "prefix_rep" "1")
2469 (set_attr "prefix_data16" "0")
2470 (set_attr "mode" "TI")])
2472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2474 ;; Parallel double-precision floating point conversion operations
2476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2478 (define_insn "sse2_cvtpi2pd"
2479 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2480 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2482 "cvtpi2pd\t{%1, %0|%0, %1}"
2483 [(set_attr "type" "ssecvt")
2484 (set_attr "unit" "mmx,*")
2485 (set_attr "prefix_data16" "1,*")
2486 (set_attr "mode" "V2DF")])
2488 (define_insn "sse2_cvtpd2pi"
2489 [(set (match_operand:V2SI 0 "register_operand" "=y")
2490 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2491 UNSPEC_FIX_NOTRUNC))]
2493 "cvtpd2pi\t{%1, %0|%0, %1}"
2494 [(set_attr "type" "ssecvt")
2495 (set_attr "unit" "mmx")
2496 (set_attr "prefix_data16" "1")
2497 (set_attr "mode" "DI")
2498 (set_attr "bdver1_decode" "double")])
2500 (define_insn "sse2_cvttpd2pi"
2501 [(set (match_operand:V2SI 0 "register_operand" "=y")
2502 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2504 "cvttpd2pi\t{%1, %0|%0, %1}"
2505 [(set_attr "type" "ssecvt")
2506 (set_attr "unit" "mmx")
2507 (set_attr "prefix_data16" "1")
2508 (set_attr "mode" "TI")
2509 (set_attr "bdver1_decode" "double")])
2511 (define_insn "*avx_cvtsi2sd"
2512 [(set (match_operand:V2DF 0 "register_operand" "=x")
2515 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2516 (match_operand:V2DF 1 "register_operand" "x")
2519 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2520 [(set_attr "type" "sseicvt")
2521 (set_attr "prefix" "vex")
2522 (set_attr "mode" "DF")])
2524 (define_insn "sse2_cvtsi2sd"
2525 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2528 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2529 (match_operand:V2DF 1 "register_operand" "0,0")
2532 "cvtsi2sd\t{%2, %0|%0, %2}"
2533 [(set_attr "type" "sseicvt")
2534 (set_attr "mode" "DF")
2535 (set_attr "athlon_decode" "double,direct")
2536 (set_attr "amdfam10_decode" "vector,double")
2537 (set_attr "bdver1_decode" "double,direct")])
2539 (define_insn "*avx_cvtsi2sdq"
2540 [(set (match_operand:V2DF 0 "register_operand" "=x")
2543 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2544 (match_operand:V2DF 1 "register_operand" "x")
2546 "TARGET_AVX && TARGET_64BIT"
2547 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "length_vex" "4")
2550 (set_attr "prefix" "vex")
2551 (set_attr "mode" "DF")])
2553 (define_insn "sse2_cvtsi2sdq"
2554 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2557 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2558 (match_operand:V2DF 1 "register_operand" "0,0")
2560 "TARGET_SSE2 && TARGET_64BIT"
2561 "cvtsi2sdq\t{%2, %0|%0, %2}"
2562 [(set_attr "type" "sseicvt")
2563 (set_attr "prefix_rex" "1")
2564 (set_attr "mode" "DF")
2565 (set_attr "athlon_decode" "double,direct")
2566 (set_attr "amdfam10_decode" "vector,double")
2567 (set_attr "bdver1_decode" "double,direct")])
2569 (define_insn "sse2_cvtsd2si"
2570 [(set (match_operand:SI 0 "register_operand" "=r,r")
2573 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2574 (parallel [(const_int 0)]))]
2575 UNSPEC_FIX_NOTRUNC))]
2577 "%vcvtsd2si\t{%1, %0|%0, %1}"
2578 [(set_attr "type" "sseicvt")
2579 (set_attr "athlon_decode" "double,vector")
2580 (set_attr "bdver1_decode" "double,double")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "SI")])
2585 (define_insn "sse2_cvtsd2si_2"
2586 [(set (match_operand:SI 0 "register_operand" "=r,r")
2587 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2588 UNSPEC_FIX_NOTRUNC))]
2590 "%vcvtsd2si\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "sseicvt")
2592 (set_attr "athlon_decode" "double,vector")
2593 (set_attr "amdfam10_decode" "double,double")
2594 (set_attr "bdver1_decode" "double,double")
2595 (set_attr "prefix_rep" "1")
2596 (set_attr "prefix" "maybe_vex")
2597 (set_attr "mode" "SI")])
2599 (define_insn "sse2_cvtsd2siq"
2600 [(set (match_operand:DI 0 "register_operand" "=r,r")
2603 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2604 (parallel [(const_int 0)]))]
2605 UNSPEC_FIX_NOTRUNC))]
2606 "TARGET_SSE2 && TARGET_64BIT"
2607 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2608 [(set_attr "type" "sseicvt")
2609 (set_attr "athlon_decode" "double,vector")
2610 (set_attr "bdver1_decode" "double,double")
2611 (set_attr "prefix_rep" "1")
2612 (set_attr "prefix" "maybe_vex")
2613 (set_attr "mode" "DI")])
2615 (define_insn "sse2_cvtsd2siq_2"
2616 [(set (match_operand:DI 0 "register_operand" "=r,r")
2617 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2618 UNSPEC_FIX_NOTRUNC))]
2619 "TARGET_SSE2 && TARGET_64BIT"
2620 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2621 [(set_attr "type" "sseicvt")
2622 (set_attr "athlon_decode" "double,vector")
2623 (set_attr "amdfam10_decode" "double,double")
2624 (set_attr "bdver1_decode" "double,double")
2625 (set_attr "prefix_rep" "1")
2626 (set_attr "prefix" "maybe_vex")
2627 (set_attr "mode" "DI")])
2629 (define_insn "sse2_cvttsd2si"
2630 [(set (match_operand:SI 0 "register_operand" "=r,r")
2633 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2634 (parallel [(const_int 0)]))))]
2636 "%vcvttsd2si\t{%1, %0|%0, %1}"
2637 [(set_attr "type" "sseicvt")
2638 (set_attr "prefix_rep" "1")
2639 (set_attr "prefix" "maybe_vex")
2640 (set_attr "mode" "SI")
2641 (set_attr "athlon_decode" "double,vector")
2642 (set_attr "amdfam10_decode" "double,double")
2643 (set_attr "bdver1_decode" "double,double")])
2645 (define_insn "sse2_cvttsd2siq"
2646 [(set (match_operand:DI 0 "register_operand" "=r,r")
2649 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2650 (parallel [(const_int 0)]))))]
2651 "TARGET_SSE2 && TARGET_64BIT"
2652 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2653 [(set_attr "type" "sseicvt")
2654 (set_attr "prefix_rep" "1")
2655 (set_attr "prefix" "maybe_vex")
2656 (set_attr "mode" "DI")
2657 (set_attr "athlon_decode" "double,vector")
2658 (set_attr "amdfam10_decode" "double,double")
2659 (set_attr "bdver1_decode" "double,double")])
2661 (define_insn "avx_cvtdq2pd256"
2662 [(set (match_operand:V4DF 0 "register_operand" "=x")
2663 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2665 "vcvtdq2pd\t{%1, %0|%0, %1}"
2666 [(set_attr "type" "ssecvt")
2667 (set_attr "prefix" "vex")
2668 (set_attr "mode" "V4DF")])
2670 (define_insn "*avx_cvtdq2pd256_2"
2671 [(set (match_operand:V4DF 0 "register_operand" "=x")
2674 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2675 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2677 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2678 [(set_attr "type" "ssecvt")
2679 (set_attr "prefix" "vex")
2680 (set_attr "mode" "V4DF")])
2682 (define_insn "sse2_cvtdq2pd"
2683 [(set (match_operand:V2DF 0 "register_operand" "=x")
2686 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2687 (parallel [(const_int 0) (const_int 1)]))))]
2689 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2690 [(set_attr "type" "ssecvt")
2691 (set_attr "prefix" "maybe_vex")
2692 (set_attr "mode" "V2DF")])
2694 (define_insn "avx_cvtpd2dq256"
2695 [(set (match_operand:V4SI 0 "register_operand" "=x")
2696 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2697 UNSPEC_FIX_NOTRUNC))]
2699 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2700 [(set_attr "type" "ssecvt")
2701 (set_attr "prefix" "vex")
2702 (set_attr "mode" "OI")])
2704 (define_expand "sse2_cvtpd2dq"
2705 [(set (match_operand:V4SI 0 "register_operand" "")
2707 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2711 "operands[2] = CONST0_RTX (V2SImode);")
2713 (define_insn "*sse2_cvtpd2dq"
2714 [(set (match_operand:V4SI 0 "register_operand" "=x")
2716 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2718 (match_operand:V2SI 2 "const0_operand" "")))]
2720 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2721 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2722 [(set_attr "type" "ssecvt")
2723 (set_attr "prefix_rep" "1")
2724 (set_attr "prefix_data16" "0")
2725 (set_attr "prefix" "maybe_vex")
2726 (set_attr "mode" "TI")
2727 (set_attr "amdfam10_decode" "double")
2728 (set_attr "athlon_decode" "vector")
2729 (set_attr "bdver1_decode" "double")])
2731 (define_insn "avx_cvttpd2dq256"
2732 [(set (match_operand:V4SI 0 "register_operand" "=x")
2733 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2735 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2736 [(set_attr "type" "ssecvt")
2737 (set_attr "prefix" "vex")
2738 (set_attr "mode" "OI")])
2740 (define_expand "sse2_cvttpd2dq"
2741 [(set (match_operand:V4SI 0 "register_operand" "")
2743 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2746 "operands[2] = CONST0_RTX (V2SImode);")
2748 (define_insn "*sse2_cvttpd2dq"
2749 [(set (match_operand:V4SI 0 "register_operand" "=x")
2751 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2752 (match_operand:V2SI 2 "const0_operand" "")))]
2754 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2755 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2756 [(set_attr "type" "ssecvt")
2757 (set_attr "prefix" "maybe_vex")
2758 (set_attr "mode" "TI")
2759 (set_attr "amdfam10_decode" "double")
2760 (set_attr "athlon_decode" "vector")
2761 (set_attr "bdver1_decode" "double")])
2763 (define_insn "*avx_cvtsd2ss"
2764 [(set (match_operand:V4SF 0 "register_operand" "=x")
2767 (float_truncate:V2SF
2768 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2769 (match_operand:V4SF 1 "register_operand" "x")
2772 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2773 [(set_attr "type" "ssecvt")
2774 (set_attr "prefix" "vex")
2775 (set_attr "mode" "SF")])
2777 (define_insn "sse2_cvtsd2ss"
2778 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2781 (float_truncate:V2SF
2782 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2783 (match_operand:V4SF 1 "register_operand" "0,0")
2786 "cvtsd2ss\t{%2, %0|%0, %2}"
2787 [(set_attr "type" "ssecvt")
2788 (set_attr "athlon_decode" "vector,double")
2789 (set_attr "amdfam10_decode" "vector,double")
2790 (set_attr "bdver1_decode" "direct,direct")
2791 (set_attr "mode" "SF")])
2793 (define_insn "*avx_cvtss2sd"
2794 [(set (match_operand:V2DF 0 "register_operand" "=x")
2798 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2799 (parallel [(const_int 0) (const_int 1)])))
2800 (match_operand:V2DF 1 "register_operand" "x")
2803 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2804 [(set_attr "type" "ssecvt")
2805 (set_attr "prefix" "vex")
2806 (set_attr "mode" "DF")])
2808 (define_insn "sse2_cvtss2sd"
2809 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2813 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2814 (parallel [(const_int 0) (const_int 1)])))
2815 (match_operand:V2DF 1 "register_operand" "0,0")
2818 "cvtss2sd\t{%2, %0|%0, %2}"
2819 [(set_attr "type" "ssecvt")
2820 (set_attr "amdfam10_decode" "vector,double")
2821 (set_attr "athlon_decode" "direct,direct")
2822 (set_attr "bdver1_decode" "direct,direct")
2823 (set_attr "mode" "DF")])
2825 (define_insn "avx_cvtpd2ps256"
2826 [(set (match_operand:V4SF 0 "register_operand" "=x")
2827 (float_truncate:V4SF
2828 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2830 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2831 [(set_attr "type" "ssecvt")
2832 (set_attr "prefix" "vex")
2833 (set_attr "mode" "V4SF")])
2835 (define_expand "sse2_cvtpd2ps"
2836 [(set (match_operand:V4SF 0 "register_operand" "")
2838 (float_truncate:V2SF
2839 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2842 "operands[2] = CONST0_RTX (V2SFmode);")
2844 (define_insn "*sse2_cvtpd2ps"
2845 [(set (match_operand:V4SF 0 "register_operand" "=x")
2847 (float_truncate:V2SF
2848 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2849 (match_operand:V2SF 2 "const0_operand" "")))]
2851 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2852 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2853 [(set_attr "type" "ssecvt")
2854 (set_attr "prefix_data16" "1")
2855 (set_attr "prefix" "maybe_vex")
2856 (set_attr "mode" "V4SF")
2857 (set_attr "amdfam10_decode" "double")
2858 (set_attr "athlon_decode" "vector")
2859 (set_attr "bdver1_decode" "double")])
2861 (define_insn "avx_cvtps2pd256"
2862 [(set (match_operand:V4DF 0 "register_operand" "=x")
2864 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2866 "vcvtps2pd\t{%1, %0|%0, %1}"
2867 [(set_attr "type" "ssecvt")
2868 (set_attr "prefix" "vex")
2869 (set_attr "mode" "V4DF")])
2871 (define_insn "*avx_cvtps2pd256_2"
2872 [(set (match_operand:V4DF 0 "register_operand" "=x")
2875 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2876 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2878 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2879 [(set_attr "type" "ssecvt")
2880 (set_attr "prefix" "vex")
2881 (set_attr "mode" "V4DF")])
2883 (define_insn "sse2_cvtps2pd"
2884 [(set (match_operand:V2DF 0 "register_operand" "=x")
2887 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2888 (parallel [(const_int 0) (const_int 1)]))))]
2890 "%vcvtps2pd\t{%1, %0|%0, %1}"
2891 [(set_attr "type" "ssecvt")
2892 (set_attr "prefix" "maybe_vex")
2893 (set_attr "mode" "V2DF")
2894 (set_attr "prefix_data16" "0")
2895 (set_attr "amdfam10_decode" "direct")
2896 (set_attr "athlon_decode" "double")
2897 (set_attr "bdver1_decode" "double")])
2899 (define_expand "vec_unpacks_hi_v4sf"
2904 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2905 (parallel [(const_int 6)
2909 (set (match_operand:V2DF 0 "register_operand" "")
2913 (parallel [(const_int 0) (const_int 1)]))))]
2915 "operands[2] = gen_reg_rtx (V4SFmode);")
2917 (define_expand "vec_unpacks_hi_v8sf"
2920 (match_operand:V8SF 1 "nonimmediate_operand" "")
2921 (parallel [(const_int 4)
2925 (set (match_operand:V4DF 0 "register_operand" "")
2930 operands[2] = gen_reg_rtx (V4SFmode);
2933 (define_expand "vec_unpacks_lo_v4sf"
2934 [(set (match_operand:V2DF 0 "register_operand" "")
2937 (match_operand:V4SF 1 "nonimmediate_operand" "")
2938 (parallel [(const_int 0) (const_int 1)]))))]
2941 (define_expand "vec_unpacks_lo_v8sf"
2942 [(set (match_operand:V4DF 0 "register_operand" "")
2945 (match_operand:V8SF 1 "nonimmediate_operand" "")
2946 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2949 (define_expand "vec_unpacks_float_hi_v8hi"
2950 [(match_operand:V4SF 0 "register_operand" "")
2951 (match_operand:V8HI 1 "register_operand" "")]
2954 rtx tmp = gen_reg_rtx (V4SImode);
2956 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2957 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2961 (define_expand "vec_unpacks_float_lo_v8hi"
2962 [(match_operand:V4SF 0 "register_operand" "")
2963 (match_operand:V8HI 1 "register_operand" "")]
2966 rtx tmp = gen_reg_rtx (V4SImode);
2968 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2969 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2973 (define_expand "vec_unpacku_float_hi_v8hi"
2974 [(match_operand:V4SF 0 "register_operand" "")
2975 (match_operand:V8HI 1 "register_operand" "")]
2978 rtx tmp = gen_reg_rtx (V4SImode);
2980 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2981 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2985 (define_expand "vec_unpacku_float_lo_v8hi"
2986 [(match_operand:V4SF 0 "register_operand" "")
2987 (match_operand:V8HI 1 "register_operand" "")]
2990 rtx tmp = gen_reg_rtx (V4SImode);
2992 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2993 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2997 (define_expand "vec_unpacks_float_hi_v4si"
3000 (match_operand:V4SI 1 "nonimmediate_operand" "")
3001 (parallel [(const_int 2)
3005 (set (match_operand:V2DF 0 "register_operand" "")
3009 (parallel [(const_int 0) (const_int 1)]))))]
3011 "operands[2] = gen_reg_rtx (V4SImode);")
3013 (define_expand "vec_unpacks_float_lo_v4si"
3014 [(set (match_operand:V2DF 0 "register_operand" "")
3017 (match_operand:V4SI 1 "nonimmediate_operand" "")
3018 (parallel [(const_int 0) (const_int 1)]))))]
3021 (define_expand "vec_unpacks_float_hi_v8si"
3024 (match_operand:V8SI 1 "nonimmediate_operand" "")
3025 (parallel [(const_int 4)
3029 (set (match_operand:V4DF 0 "register_operand" "")
3033 "operands[2] = gen_reg_rtx (V4SImode);")
3035 (define_expand "vec_unpacks_float_lo_v8si"
3036 [(set (match_operand:V4DF 0 "register_operand" "")
3039 (match_operand:V8SI 1 "nonimmediate_operand" "")
3040 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3043 (define_expand "vec_unpacku_float_hi_v4si"
3046 (match_operand:V4SI 1 "nonimmediate_operand" "")
3047 (parallel [(const_int 2)
3055 (parallel [(const_int 0) (const_int 1)]))))
3057 (lt:V2DF (match_dup 6) (match_dup 3)))
3059 (and:V2DF (match_dup 7) (match_dup 4)))
3060 (set (match_operand:V2DF 0 "register_operand" "")
3061 (plus:V2DF (match_dup 6) (match_dup 8)))]
3064 REAL_VALUE_TYPE TWO32r;
3068 real_ldexp (&TWO32r, &dconst1, 32);
3069 x = const_double_from_real_value (TWO32r, DFmode);
3071 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3072 operands[4] = force_reg (V2DFmode,
3073 ix86_build_const_vector (V2DFmode, 1, x));
3075 operands[5] = gen_reg_rtx (V4SImode);
3077 for (i = 6; i < 9; i++)
3078 operands[i] = gen_reg_rtx (V2DFmode);
3081 (define_expand "vec_unpacku_float_lo_v4si"
3085 (match_operand:V4SI 1 "nonimmediate_operand" "")
3086 (parallel [(const_int 0) (const_int 1)]))))
3088 (lt:V2DF (match_dup 5) (match_dup 3)))
3090 (and:V2DF (match_dup 6) (match_dup 4)))
3091 (set (match_operand:V2DF 0 "register_operand" "")
3092 (plus:V2DF (match_dup 5) (match_dup 7)))]
3095 REAL_VALUE_TYPE TWO32r;
3099 real_ldexp (&TWO32r, &dconst1, 32);
3100 x = const_double_from_real_value (TWO32r, DFmode);
3102 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3103 operands[4] = force_reg (V2DFmode,
3104 ix86_build_const_vector (V2DFmode, 1, x));
3106 for (i = 5; i < 8; i++)
3107 operands[i] = gen_reg_rtx (V2DFmode);
3110 (define_expand "vec_pack_trunc_v4df"
3112 (float_truncate:V4SF
3113 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3115 (float_truncate:V4SF
3116 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3117 (set (match_operand:V8SF 0 "register_operand" "")
3123 operands[3] = gen_reg_rtx (V4SFmode);
3124 operands[4] = gen_reg_rtx (V4SFmode);
3127 (define_expand "vec_pack_trunc_v2df"
3128 [(match_operand:V4SF 0 "register_operand" "")
3129 (match_operand:V2DF 1 "nonimmediate_operand" "")
3130 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3135 r1 = gen_reg_rtx (V4SFmode);
3136 r2 = gen_reg_rtx (V4SFmode);
3138 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3139 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3140 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3144 (define_expand "vec_pack_sfix_trunc_v2df"
3145 [(match_operand:V4SI 0 "register_operand" "")
3146 (match_operand:V2DF 1 "nonimmediate_operand" "")
3147 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3152 r1 = gen_reg_rtx (V4SImode);
3153 r2 = gen_reg_rtx (V4SImode);
3155 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3156 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3157 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3158 gen_lowpart (V2DImode, r1),
3159 gen_lowpart (V2DImode, r2)));
3163 (define_expand "vec_pack_sfix_v2df"
3164 [(match_operand:V4SI 0 "register_operand" "")
3165 (match_operand:V2DF 1 "nonimmediate_operand" "")
3166 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3171 r1 = gen_reg_rtx (V4SImode);
3172 r2 = gen_reg_rtx (V4SImode);
3174 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3175 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3176 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3177 gen_lowpart (V2DImode, r1),
3178 gen_lowpart (V2DImode, r2)));
3182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3184 ;; Parallel single-precision floating point element swizzling
3186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3188 (define_expand "sse_movhlps_exp"
3189 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3192 (match_operand:V4SF 1 "nonimmediate_operand" "")
3193 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3194 (parallel [(const_int 6)
3200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3202 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3204 /* Fix up the destination if needed. */
3205 if (dst != operands[0])
3206 emit_move_insn (operands[0], dst);
3211 (define_insn "*avx_movhlps"
3212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3215 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3216 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3217 (parallel [(const_int 6)
3221 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3223 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3224 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3225 vmovhps\t{%2, %0|%0, %2}"
3226 [(set_attr "type" "ssemov")
3227 (set_attr "prefix" "vex")
3228 (set_attr "mode" "V4SF,V2SF,V2SF")])
3230 (define_insn "sse_movhlps"
3231 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3234 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3235 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3236 (parallel [(const_int 6)
3240 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3242 movhlps\t{%2, %0|%0, %2}
3243 movlps\t{%H2, %0|%0, %H2}
3244 movhps\t{%2, %0|%0, %2}"
3245 [(set_attr "type" "ssemov")
3246 (set_attr "mode" "V4SF,V2SF,V2SF")])
3248 (define_expand "sse_movlhps_exp"
3249 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3252 (match_operand:V4SF 1 "nonimmediate_operand" "")
3253 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3254 (parallel [(const_int 0)
3260 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3262 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3264 /* Fix up the destination if needed. */
3265 if (dst != operands[0])
3266 emit_move_insn (operands[0], dst);
3271 (define_insn "*avx_movlhps"
3272 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3275 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3276 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3277 (parallel [(const_int 0)
3281 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3283 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3284 vmovhps\t{%2, %1, %0|%0, %1, %2}
3285 vmovlps\t{%2, %H0|%H0, %2}"
3286 [(set_attr "type" "ssemov")
3287 (set_attr "prefix" "vex")
3288 (set_attr "mode" "V4SF,V2SF,V2SF")])
3290 (define_insn "sse_movlhps"
3291 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3294 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3295 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3296 (parallel [(const_int 0)
3300 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3302 movlhps\t{%2, %0|%0, %2}
3303 movhps\t{%2, %0|%0, %2}
3304 movlps\t{%2, %H0|%H0, %2}"
3305 [(set_attr "type" "ssemov")
3306 (set_attr "mode" "V4SF,V2SF,V2SF")])
3308 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3309 (define_insn "avx_unpckhps256"
3310 [(set (match_operand:V8SF 0 "register_operand" "=x")
3313 (match_operand:V8SF 1 "register_operand" "x")
3314 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3315 (parallel [(const_int 2) (const_int 10)
3316 (const_int 3) (const_int 11)
3317 (const_int 6) (const_int 14)
3318 (const_int 7) (const_int 15)])))]
3320 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3321 [(set_attr "type" "sselog")
3322 (set_attr "prefix" "vex")
3323 (set_attr "mode" "V8SF")])
3325 (define_insn "*avx_interleave_highv4sf"
3326 [(set (match_operand:V4SF 0 "register_operand" "=x")
3329 (match_operand:V4SF 1 "register_operand" "x")
3330 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3331 (parallel [(const_int 2) (const_int 6)
3332 (const_int 3) (const_int 7)])))]
3334 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3335 [(set_attr "type" "sselog")
3336 (set_attr "prefix" "vex")
3337 (set_attr "mode" "V4SF")])
3339 (define_expand "vec_interleave_highv8sf"
3343 (match_operand:V8SF 1 "register_operand" "x")
3344 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3345 (parallel [(const_int 0) (const_int 8)
3346 (const_int 1) (const_int 9)
3347 (const_int 4) (const_int 12)
3348 (const_int 5) (const_int 13)])))
3354 (parallel [(const_int 2) (const_int 10)
3355 (const_int 3) (const_int 11)
3356 (const_int 6) (const_int 14)
3357 (const_int 7) (const_int 15)])))
3358 (set (match_operand:V8SF 0 "register_operand" "")
3363 (parallel [(const_int 4) (const_int 5)
3364 (const_int 6) (const_int 7)
3365 (const_int 12) (const_int 13)
3366 (const_int 14) (const_int 15)])))]
3369 operands[3] = gen_reg_rtx (V8SFmode);
3370 operands[4] = gen_reg_rtx (V8SFmode);
3373 (define_insn "vec_interleave_highv4sf"
3374 [(set (match_operand:V4SF 0 "register_operand" "=x")
3377 (match_operand:V4SF 1 "register_operand" "0")
3378 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3379 (parallel [(const_int 2) (const_int 6)
3380 (const_int 3) (const_int 7)])))]
3382 "unpckhps\t{%2, %0|%0, %2}"
3383 [(set_attr "type" "sselog")
3384 (set_attr "mode" "V4SF")])
3386 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3387 (define_insn "avx_unpcklps256"
3388 [(set (match_operand:V8SF 0 "register_operand" "=x")
3391 (match_operand:V8SF 1 "register_operand" "x")
3392 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3393 (parallel [(const_int 0) (const_int 8)
3394 (const_int 1) (const_int 9)
3395 (const_int 4) (const_int 12)
3396 (const_int 5) (const_int 13)])))]
3398 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3399 [(set_attr "type" "sselog")
3400 (set_attr "prefix" "vex")
3401 (set_attr "mode" "V8SF")])
3403 (define_insn "*avx_interleave_lowv4sf"
3404 [(set (match_operand:V4SF 0 "register_operand" "=x")
3407 (match_operand:V4SF 1 "register_operand" "x")
3408 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3409 (parallel [(const_int 0) (const_int 4)
3410 (const_int 1) (const_int 5)])))]
3412 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3413 [(set_attr "type" "sselog")
3414 (set_attr "prefix" "vex")
3415 (set_attr "mode" "V4SF")])
3417 (define_expand "vec_interleave_lowv8sf"
3421 (match_operand:V8SF 1 "register_operand" "x")
3422 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3423 (parallel [(const_int 0) (const_int 8)
3424 (const_int 1) (const_int 9)
3425 (const_int 4) (const_int 12)
3426 (const_int 5) (const_int 13)])))
3432 (parallel [(const_int 2) (const_int 10)
3433 (const_int 3) (const_int 11)
3434 (const_int 6) (const_int 14)
3435 (const_int 7) (const_int 15)])))
3436 (set (match_operand:V8SF 0 "register_operand" "")
3441 (parallel [(const_int 0) (const_int 1)
3442 (const_int 2) (const_int 3)
3443 (const_int 8) (const_int 9)
3444 (const_int 10) (const_int 11)])))]
3447 operands[3] = gen_reg_rtx (V8SFmode);
3448 operands[4] = gen_reg_rtx (V8SFmode);
3451 (define_insn "vec_interleave_lowv4sf"
3452 [(set (match_operand:V4SF 0 "register_operand" "=x")
3455 (match_operand:V4SF 1 "register_operand" "0")
3456 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3457 (parallel [(const_int 0) (const_int 4)
3458 (const_int 1) (const_int 5)])))]
3460 "unpcklps\t{%2, %0|%0, %2}"
3461 [(set_attr "type" "sselog")
3462 (set_attr "mode" "V4SF")])
3464 ;; These are modeled with the same vec_concat as the others so that we
3465 ;; capture users of shufps that can use the new instructions
3466 (define_insn "avx_movshdup256"
3467 [(set (match_operand:V8SF 0 "register_operand" "=x")
3470 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3472 (parallel [(const_int 1) (const_int 1)
3473 (const_int 3) (const_int 3)
3474 (const_int 5) (const_int 5)
3475 (const_int 7) (const_int 7)])))]
3477 "vmovshdup\t{%1, %0|%0, %1}"
3478 [(set_attr "type" "sse")
3479 (set_attr "prefix" "vex")
3480 (set_attr "mode" "V8SF")])
3482 (define_insn "sse3_movshdup"
3483 [(set (match_operand:V4SF 0 "register_operand" "=x")
3486 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3488 (parallel [(const_int 1)
3493 "%vmovshdup\t{%1, %0|%0, %1}"
3494 [(set_attr "type" "sse")
3495 (set_attr "prefix_rep" "1")
3496 (set_attr "prefix" "maybe_vex")
3497 (set_attr "mode" "V4SF")])
3499 (define_insn "avx_movsldup256"
3500 [(set (match_operand:V8SF 0 "register_operand" "=x")
3503 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3505 (parallel [(const_int 0) (const_int 0)
3506 (const_int 2) (const_int 2)
3507 (const_int 4) (const_int 4)
3508 (const_int 6) (const_int 6)])))]
3510 "vmovsldup\t{%1, %0|%0, %1}"
3511 [(set_attr "type" "sse")
3512 (set_attr "prefix" "vex")
3513 (set_attr "mode" "V8SF")])
3515 (define_insn "sse3_movsldup"
3516 [(set (match_operand:V4SF 0 "register_operand" "=x")
3519 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3521 (parallel [(const_int 0)
3526 "%vmovsldup\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "sse")
3528 (set_attr "prefix_rep" "1")
3529 (set_attr "prefix" "maybe_vex")
3530 (set_attr "mode" "V4SF")])
3532 (define_expand "avx_shufps256"
3533 [(match_operand:V8SF 0 "register_operand" "")
3534 (match_operand:V8SF 1 "register_operand" "")
3535 (match_operand:V8SF 2 "nonimmediate_operand" "")
3536 (match_operand:SI 3 "const_int_operand" "")]
3539 int mask = INTVAL (operands[3]);
3540 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3541 GEN_INT ((mask >> 0) & 3),
3542 GEN_INT ((mask >> 2) & 3),
3543 GEN_INT (((mask >> 4) & 3) + 8),
3544 GEN_INT (((mask >> 6) & 3) + 8),
3545 GEN_INT (((mask >> 0) & 3) + 4),
3546 GEN_INT (((mask >> 2) & 3) + 4),
3547 GEN_INT (((mask >> 4) & 3) + 12),
3548 GEN_INT (((mask >> 6) & 3) + 12)));
3552 ;; One bit in mask selects 2 elements.
3553 (define_insn "avx_shufps256_1"
3554 [(set (match_operand:V8SF 0 "register_operand" "=x")
3557 (match_operand:V8SF 1 "register_operand" "x")
3558 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3559 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3560 (match_operand 4 "const_0_to_3_operand" "")
3561 (match_operand 5 "const_8_to_11_operand" "")
3562 (match_operand 6 "const_8_to_11_operand" "")
3563 (match_operand 7 "const_4_to_7_operand" "")
3564 (match_operand 8 "const_4_to_7_operand" "")
3565 (match_operand 9 "const_12_to_15_operand" "")
3566 (match_operand 10 "const_12_to_15_operand" "")])))]
3568 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3569 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3570 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3571 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3574 mask = INTVAL (operands[3]);
3575 mask |= INTVAL (operands[4]) << 2;
3576 mask |= (INTVAL (operands[5]) - 8) << 4;
3577 mask |= (INTVAL (operands[6]) - 8) << 6;
3578 operands[3] = GEN_INT (mask);
3580 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3582 [(set_attr "type" "sselog")
3583 (set_attr "length_immediate" "1")
3584 (set_attr "prefix" "vex")
3585 (set_attr "mode" "V8SF")])
3587 (define_expand "sse_shufps"
3588 [(match_operand:V4SF 0 "register_operand" "")
3589 (match_operand:V4SF 1 "register_operand" "")
3590 (match_operand:V4SF 2 "nonimmediate_operand" "")
3591 (match_operand:SI 3 "const_int_operand" "")]
3594 int mask = INTVAL (operands[3]);
3595 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3596 GEN_INT ((mask >> 0) & 3),
3597 GEN_INT ((mask >> 2) & 3),
3598 GEN_INT (((mask >> 4) & 3) + 4),
3599 GEN_INT (((mask >> 6) & 3) + 4)));
3603 (define_insn "*avx_shufps_<mode>"
3604 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3605 (vec_select:SSEMODE4S
3606 (vec_concat:<ssedoublesizemode>
3607 (match_operand:SSEMODE4S 1 "register_operand" "x")
3608 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3609 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3610 (match_operand 4 "const_0_to_3_operand" "")
3611 (match_operand 5 "const_4_to_7_operand" "")
3612 (match_operand 6 "const_4_to_7_operand" "")])))]
3616 mask |= INTVAL (operands[3]) << 0;
3617 mask |= INTVAL (operands[4]) << 2;
3618 mask |= (INTVAL (operands[5]) - 4) << 4;
3619 mask |= (INTVAL (operands[6]) - 4) << 6;
3620 operands[3] = GEN_INT (mask);
3622 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3624 [(set_attr "type" "sselog")
3625 (set_attr "length_immediate" "1")
3626 (set_attr "prefix" "vex")
3627 (set_attr "mode" "V4SF")])
3629 (define_insn "sse_shufps_<mode>"
3630 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3631 (vec_select:SSEMODE4S
3632 (vec_concat:<ssedoublesizemode>
3633 (match_operand:SSEMODE4S 1 "register_operand" "0")
3634 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3635 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3636 (match_operand 4 "const_0_to_3_operand" "")
3637 (match_operand 5 "const_4_to_7_operand" "")
3638 (match_operand 6 "const_4_to_7_operand" "")])))]
3642 mask |= INTVAL (operands[3]) << 0;
3643 mask |= INTVAL (operands[4]) << 2;
3644 mask |= (INTVAL (operands[5]) - 4) << 4;
3645 mask |= (INTVAL (operands[6]) - 4) << 6;
3646 operands[3] = GEN_INT (mask);
3648 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3650 [(set_attr "type" "sselog")
3651 (set_attr "length_immediate" "1")
3652 (set_attr "mode" "V4SF")])
3654 (define_insn "sse_storehps"
3655 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3657 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3658 (parallel [(const_int 2) (const_int 3)])))]
3661 %vmovhps\t{%1, %0|%0, %1}
3662 %vmovhlps\t{%1, %d0|%d0, %1}
3663 %vmovlps\t{%H1, %d0|%d0, %H1}"
3664 [(set_attr "type" "ssemov")
3665 (set_attr "prefix" "maybe_vex")
3666 (set_attr "mode" "V2SF,V4SF,V2SF")])
3668 (define_expand "sse_loadhps_exp"
3669 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3672 (match_operand:V4SF 1 "nonimmediate_operand" "")
3673 (parallel [(const_int 0) (const_int 1)]))
3674 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3677 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3679 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3681 /* Fix up the destination if needed. */
3682 if (dst != operands[0])
3683 emit_move_insn (operands[0], dst);
3688 (define_insn "*avx_loadhps"
3689 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3692 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3693 (parallel [(const_int 0) (const_int 1)]))
3694 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3697 vmovhps\t{%2, %1, %0|%0, %1, %2}
3698 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3699 vmovlps\t{%2, %H0|%H0, %2}"
3700 [(set_attr "type" "ssemov")
3701 (set_attr "prefix" "vex")
3702 (set_attr "mode" "V2SF,V4SF,V2SF")])
3704 (define_insn "sse_loadhps"
3705 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3708 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3709 (parallel [(const_int 0) (const_int 1)]))
3710 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3713 movhps\t{%2, %0|%0, %2}
3714 movlhps\t{%2, %0|%0, %2}
3715 movlps\t{%2, %H0|%H0, %2}"
3716 [(set_attr "type" "ssemov")
3717 (set_attr "mode" "V2SF,V4SF,V2SF")])
3719 (define_insn "*avx_storelps"
3720 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3722 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3723 (parallel [(const_int 0) (const_int 1)])))]
3726 vmovlps\t{%1, %0|%0, %1}
3727 vmovaps\t{%1, %0|%0, %1}
3728 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3729 [(set_attr "type" "ssemov")
3730 (set_attr "prefix" "vex")
3731 (set_attr "mode" "V2SF,V2DF,V2SF")])
3733 (define_insn "sse_storelps"
3734 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3736 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3737 (parallel [(const_int 0) (const_int 1)])))]
3740 movlps\t{%1, %0|%0, %1}
3741 movaps\t{%1, %0|%0, %1}
3742 movlps\t{%1, %0|%0, %1}"
3743 [(set_attr "type" "ssemov")
3744 (set_attr "mode" "V2SF,V4SF,V2SF")])
3746 (define_expand "sse_loadlps_exp"
3747 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3749 (match_operand:V2SF 2 "nonimmediate_operand" "")
3751 (match_operand:V4SF 1 "nonimmediate_operand" "")
3752 (parallel [(const_int 2) (const_int 3)]))))]
3755 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3757 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3759 /* Fix up the destination if needed. */
3760 if (dst != operands[0])
3761 emit_move_insn (operands[0], dst);
3766 (define_insn "*avx_loadlps"
3767 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3769 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3771 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3772 (parallel [(const_int 2) (const_int 3)]))))]
3775 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3776 vmovlps\t{%2, %1, %0|%0, %1, %2}
3777 vmovlps\t{%2, %0|%0, %2}"
3778 [(set_attr "type" "sselog,ssemov,ssemov")
3779 (set_attr "length_immediate" "1,*,*")
3780 (set_attr "prefix" "vex")
3781 (set_attr "mode" "V4SF,V2SF,V2SF")])
3783 (define_insn "sse_loadlps"
3784 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3786 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3788 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3789 (parallel [(const_int 2) (const_int 3)]))))]
3792 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3793 movlps\t{%2, %0|%0, %2}
3794 movlps\t{%2, %0|%0, %2}"
3795 [(set_attr "type" "sselog,ssemov,ssemov")
3796 (set_attr "length_immediate" "1,*,*")
3797 (set_attr "mode" "V4SF,V2SF,V2SF")])
3799 (define_insn "*avx_movss"
3800 [(set (match_operand:V4SF 0 "register_operand" "=x")
3802 (match_operand:V4SF 2 "register_operand" "x")
3803 (match_operand:V4SF 1 "register_operand" "x")
3806 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3807 [(set_attr "type" "ssemov")
3808 (set_attr "prefix" "vex")
3809 (set_attr "mode" "SF")])
3811 (define_insn "sse_movss"
3812 [(set (match_operand:V4SF 0 "register_operand" "=x")
3814 (match_operand:V4SF 2 "register_operand" "x")
3815 (match_operand:V4SF 1 "register_operand" "0")
3818 "movss\t{%2, %0|%0, %2}"
3819 [(set_attr "type" "ssemov")
3820 (set_attr "mode" "SF")])
3822 (define_expand "vec_dupv4sf"
3823 [(set (match_operand:V4SF 0 "register_operand" "")
3825 (match_operand:SF 1 "nonimmediate_operand" "")))]
3829 operands[1] = force_reg (V4SFmode, operands[1]);
3832 (define_insn "*vec_dupv4sf_avx"
3833 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3835 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3838 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3839 vbroadcastss\t{%1, %0|%0, %1}"
3840 [(set_attr "type" "sselog1,ssemov")
3841 (set_attr "length_immediate" "1,0")
3842 (set_attr "prefix_extra" "0,1")
3843 (set_attr "prefix" "vex")
3844 (set_attr "mode" "V4SF")])
3846 (define_insn "*vec_dupv4sf"
3847 [(set (match_operand:V4SF 0 "register_operand" "=x")
3849 (match_operand:SF 1 "register_operand" "0")))]
3851 "shufps\t{$0, %0, %0|%0, %0, 0}"
3852 [(set_attr "type" "sselog1")
3853 (set_attr "length_immediate" "1")
3854 (set_attr "mode" "V4SF")])
3856 (define_insn "*vec_concatv2sf_avx"
3857 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3859 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3860 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3863 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3864 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3865 vmovss\t{%1, %0|%0, %1}
3866 punpckldq\t{%2, %0|%0, %2}
3867 movd\t{%1, %0|%0, %1}"
3868 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3869 (set_attr "length_immediate" "*,1,*,*,*")
3870 (set_attr "prefix_extra" "*,1,*,*,*")
3871 (set (attr "prefix")
3872 (if_then_else (eq_attr "alternative" "3,4")
3873 (const_string "orig")
3874 (const_string "vex")))
3875 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3877 ;; Although insertps takes register source, we prefer
3878 ;; unpcklps with register source since it is shorter.
3879 (define_insn "*vec_concatv2sf_sse4_1"
3880 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3882 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3883 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3886 unpcklps\t{%2, %0|%0, %2}
3887 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3888 movss\t{%1, %0|%0, %1}
3889 punpckldq\t{%2, %0|%0, %2}
3890 movd\t{%1, %0|%0, %1}"
3891 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3892 (set_attr "prefix_data16" "*,1,*,*,*")
3893 (set_attr "prefix_extra" "*,1,*,*,*")
3894 (set_attr "length_immediate" "*,1,*,*,*")
3895 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3897 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3898 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3899 ;; alternatives pretty much forces the MMX alternative to be chosen.
3900 (define_insn "*vec_concatv2sf_sse"
3901 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3903 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3904 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3907 unpcklps\t{%2, %0|%0, %2}
3908 movss\t{%1, %0|%0, %1}
3909 punpckldq\t{%2, %0|%0, %2}
3910 movd\t{%1, %0|%0, %1}"
3911 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3912 (set_attr "mode" "V4SF,SF,DI,DI")])
3914 (define_insn "*vec_concatv4sf_avx"
3915 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3917 (match_operand:V2SF 1 "register_operand" " x,x")
3918 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3921 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3922 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3923 [(set_attr "type" "ssemov")
3924 (set_attr "prefix" "vex")
3925 (set_attr "mode" "V4SF,V2SF")])
3927 (define_insn "*vec_concatv4sf_sse"
3928 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3930 (match_operand:V2SF 1 "register_operand" " 0,0")
3931 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3934 movlhps\t{%2, %0|%0, %2}
3935 movhps\t{%2, %0|%0, %2}"
3936 [(set_attr "type" "ssemov")
3937 (set_attr "mode" "V4SF,V2SF")])
3939 (define_expand "vec_init<mode>"
3940 [(match_operand:SSEMODE 0 "register_operand" "")
3941 (match_operand 1 "" "")]
3944 ix86_expand_vector_init (false, operands[0], operands[1]);
3948 (define_insn "*vec_set<mode>_0_avx"
3949 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3950 (vec_merge:SSEMODE4S
3951 (vec_duplicate:SSEMODE4S
3952 (match_operand:<ssescalarmode> 2
3953 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3954 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3958 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3959 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3960 vmovd\t{%2, %0|%0, %2}
3961 vmovss\t{%2, %1, %0|%0, %1, %2}
3962 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3964 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3965 (set_attr "prefix_extra" "*,*,*,*,1,*")
3966 (set_attr "length_immediate" "*,*,*,*,1,*")
3967 (set_attr "prefix" "vex")
3968 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3970 (define_insn "*vec_set<mode>_0_sse4_1"
3971 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3972 (vec_merge:SSEMODE4S
3973 (vec_duplicate:SSEMODE4S
3974 (match_operand:<ssescalarmode> 2
3975 "general_operand" " x,m,*r,x,*rm,*rfF"))
3976 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3980 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3981 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3982 movd\t{%2, %0|%0, %2}
3983 movss\t{%2, %0|%0, %2}
3984 pinsrd\t{$0, %2, %0|%0, %2, 0}
3986 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3987 (set_attr "prefix_extra" "*,*,*,*,1,*")
3988 (set_attr "length_immediate" "*,*,*,*,1,*")
3989 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3991 (define_insn "*vec_set<mode>_0_sse2"
3992 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3993 (vec_merge:SSEMODE4S
3994 (vec_duplicate:SSEMODE4S
3995 (match_operand:<ssescalarmode> 2
3996 "general_operand" " m,*r,x,x*rfF"))
3997 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4001 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4002 movd\t{%2, %0|%0, %2}
4003 movss\t{%2, %0|%0, %2}
4005 [(set_attr "type" "ssemov")
4006 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4008 (define_insn "vec_set<mode>_0"
4009 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4010 (vec_merge:SSEMODE4S
4011 (vec_duplicate:SSEMODE4S
4012 (match_operand:<ssescalarmode> 2
4013 "general_operand" " m,x,x*rfF"))
4014 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4018 movss\t{%2, %0|%0, %2}
4019 movss\t{%2, %0|%0, %2}
4021 [(set_attr "type" "ssemov")
4022 (set_attr "mode" "SF,SF,*")])
4024 ;; A subset is vec_setv4sf.
4025 (define_insn "*vec_setv4sf_avx"
4026 [(set (match_operand:V4SF 0 "register_operand" "=x")
4029 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4030 (match_operand:V4SF 1 "register_operand" "x")
4031 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4034 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4035 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4037 [(set_attr "type" "sselog")
4038 (set_attr "prefix_extra" "1")
4039 (set_attr "length_immediate" "1")
4040 (set_attr "prefix" "vex")
4041 (set_attr "mode" "V4SF")])
4043 (define_insn "*vec_setv4sf_sse4_1"
4044 [(set (match_operand:V4SF 0 "register_operand" "=x")
4047 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4048 (match_operand:V4SF 1 "register_operand" "0")
4049 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4052 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4053 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4055 [(set_attr "type" "sselog")
4056 (set_attr "prefix_data16" "1")
4057 (set_attr "prefix_extra" "1")
4058 (set_attr "length_immediate" "1")
4059 (set_attr "mode" "V4SF")])
4061 (define_insn "*avx_insertps"
4062 [(set (match_operand:V4SF 0 "register_operand" "=x")
4063 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4064 (match_operand:V4SF 1 "register_operand" "x")
4065 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4068 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4069 [(set_attr "type" "sselog")
4070 (set_attr "prefix" "vex")
4071 (set_attr "prefix_extra" "1")
4072 (set_attr "length_immediate" "1")
4073 (set_attr "mode" "V4SF")])
4075 (define_insn "sse4_1_insertps"
4076 [(set (match_operand:V4SF 0 "register_operand" "=x")
4077 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4078 (match_operand:V4SF 1 "register_operand" "0")
4079 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4082 "insertps\t{%3, %2, %0|%0, %2, %3}";
4083 [(set_attr "type" "sselog")
4084 (set_attr "prefix_data16" "1")
4085 (set_attr "prefix_extra" "1")
4086 (set_attr "length_immediate" "1")
4087 (set_attr "mode" "V4SF")])
4090 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4091 (vec_merge:SSEMODE4S
4092 (vec_duplicate:SSEMODE4S
4093 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4096 "TARGET_SSE && reload_completed"
4099 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4104 (define_expand "vec_set<mode>"
4105 [(match_operand:SSEMODE 0 "register_operand" "")
4106 (match_operand:<ssescalarmode> 1 "register_operand" "")
4107 (match_operand 2 "const_int_operand" "")]
4110 ix86_expand_vector_set (false, operands[0], operands[1],
4111 INTVAL (operands[2]));
4115 (define_insn_and_split "*vec_extractv4sf_0"
4116 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4118 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4119 (parallel [(const_int 0)])))]
4120 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4122 "&& reload_completed"
4125 rtx op1 = operands[1];
4127 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4129 op1 = gen_lowpart (SFmode, op1);
4130 emit_move_insn (operands[0], op1);
4134 (define_expand "avx_vextractf128<mode>"
4135 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4136 (match_operand:AVX256MODE 1 "register_operand" "")
4137 (match_operand:SI 2 "const_0_to_1_operand" "")]
4140 rtx (*insn)(rtx, rtx);
4142 switch (INTVAL (operands[2]))
4145 insn = gen_vec_extract_lo_<mode>;
4148 insn = gen_vec_extract_hi_<mode>;
4154 emit_insn (insn (operands[0], operands[1]));
4158 (define_insn_and_split "vec_extract_lo_<mode>"
4159 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4160 (vec_select:<avxhalfvecmode>
4161 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4162 (parallel [(const_int 0) (const_int 1)])))]
4165 "&& reload_completed"
4168 rtx op1 = operands[1];
4170 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4172 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4173 emit_move_insn (operands[0], op1);
4177 (define_insn "vec_extract_hi_<mode>"
4178 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4179 (vec_select:<avxhalfvecmode>
4180 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4181 (parallel [(const_int 2) (const_int 3)])))]
4183 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_extra" "1")
4186 (set_attr "length_immediate" "1")
4187 (set_attr "memory" "none,store")
4188 (set_attr "prefix" "vex")
4189 (set_attr "mode" "V8SF")])
4191 (define_insn_and_split "vec_extract_lo_<mode>"
4192 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4193 (vec_select:<avxhalfvecmode>
4194 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4195 (parallel [(const_int 0) (const_int 1)
4196 (const_int 2) (const_int 3)])))]
4199 "&& reload_completed"
4202 rtx op1 = operands[1];
4204 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4206 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4207 emit_move_insn (operands[0], op1);
4211 (define_insn "vec_extract_hi_<mode>"
4212 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4213 (vec_select:<avxhalfvecmode>
4214 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4215 (parallel [(const_int 4) (const_int 5)
4216 (const_int 6) (const_int 7)])))]
4218 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_extra" "1")
4221 (set_attr "length_immediate" "1")
4222 (set_attr "memory" "none,store")
4223 (set_attr "prefix" "vex")
4224 (set_attr "mode" "V8SF")])
4226 (define_insn_and_split "vec_extract_lo_v16hi"
4227 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4229 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4230 (parallel [(const_int 0) (const_int 1)
4231 (const_int 2) (const_int 3)
4232 (const_int 4) (const_int 5)
4233 (const_int 6) (const_int 7)])))]
4236 "&& reload_completed"
4239 rtx op1 = operands[1];
4241 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4243 op1 = gen_lowpart (V8HImode, op1);
4244 emit_move_insn (operands[0], op1);
4248 (define_insn "vec_extract_hi_v16hi"
4249 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4251 (match_operand:V16HI 1 "register_operand" "x,x")
4252 (parallel [(const_int 8) (const_int 9)
4253 (const_int 10) (const_int 11)
4254 (const_int 12) (const_int 13)
4255 (const_int 14) (const_int 15)])))]
4257 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4258 [(set_attr "type" "sselog")
4259 (set_attr "prefix_extra" "1")
4260 (set_attr "length_immediate" "1")
4261 (set_attr "memory" "none,store")
4262 (set_attr "prefix" "vex")
4263 (set_attr "mode" "V8SF")])
4265 (define_insn_and_split "vec_extract_lo_v32qi"
4266 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4268 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4269 (parallel [(const_int 0) (const_int 1)
4270 (const_int 2) (const_int 3)
4271 (const_int 4) (const_int 5)
4272 (const_int 6) (const_int 7)
4273 (const_int 8) (const_int 9)
4274 (const_int 10) (const_int 11)
4275 (const_int 12) (const_int 13)
4276 (const_int 14) (const_int 15)])))]
4279 "&& reload_completed"
4282 rtx op1 = operands[1];
4284 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4286 op1 = gen_lowpart (V16QImode, op1);
4287 emit_move_insn (operands[0], op1);
4291 (define_insn "vec_extract_hi_v32qi"
4292 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4294 (match_operand:V32QI 1 "register_operand" "x,x")
4295 (parallel [(const_int 16) (const_int 17)
4296 (const_int 18) (const_int 19)
4297 (const_int 20) (const_int 21)
4298 (const_int 22) (const_int 23)
4299 (const_int 24) (const_int 25)
4300 (const_int 26) (const_int 27)
4301 (const_int 28) (const_int 29)
4302 (const_int 30) (const_int 31)])))]
4304 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4305 [(set_attr "type" "sselog")
4306 (set_attr "prefix_extra" "1")
4307 (set_attr "length_immediate" "1")
4308 (set_attr "memory" "none,store")
4309 (set_attr "prefix" "vex")
4310 (set_attr "mode" "V8SF")])
4312 (define_insn "*sse4_1_extractps"
4313 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4315 (match_operand:V4SF 1 "register_operand" "x")
4316 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4318 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4319 [(set_attr "type" "sselog")
4320 (set_attr "prefix_data16" "1")
4321 (set_attr "prefix_extra" "1")
4322 (set_attr "length_immediate" "1")
4323 (set_attr "prefix" "maybe_vex")
4324 (set_attr "mode" "V4SF")])
4326 (define_insn_and_split "*vec_extract_v4sf_mem"
4327 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4329 (match_operand:V4SF 1 "memory_operand" "o")
4330 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4336 int i = INTVAL (operands[2]);
4338 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4342 (define_expand "vec_extract<mode>"
4343 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4344 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4345 (match_operand 2 "const_int_operand" "")]
4348 ix86_expand_vector_extract (false, operands[0], operands[1],
4349 INTVAL (operands[2]));
4353 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4355 ;; Parallel double-precision floating point element swizzling
4357 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4359 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4360 (define_insn "avx_unpckhpd256"
4361 [(set (match_operand:V4DF 0 "register_operand" "=x")
4364 (match_operand:V4DF 1 "register_operand" "x")
4365 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4366 (parallel [(const_int 1) (const_int 5)
4367 (const_int 3) (const_int 7)])))]
4369 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4370 [(set_attr "type" "sselog")
4371 (set_attr "prefix" "vex")
4372 (set_attr "mode" "V4DF")])
4374 (define_expand "vec_interleave_highv4df"
4378 (match_operand:V4DF 1 "register_operand" "x")
4379 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4380 (parallel [(const_int 0) (const_int 4)
4381 (const_int 2) (const_int 6)])))
4387 (parallel [(const_int 1) (const_int 5)
4388 (const_int 3) (const_int 7)])))
4389 (set (match_operand:V4DF 0 "register_operand" "")
4394 (parallel [(const_int 2) (const_int 3)
4395 (const_int 6) (const_int 7)])))]
4398 operands[3] = gen_reg_rtx (V4DFmode);
4399 operands[4] = gen_reg_rtx (V4DFmode);
4403 (define_expand "vec_interleave_highv2df"
4404 [(set (match_operand:V2DF 0 "register_operand" "")
4407 (match_operand:V2DF 1 "nonimmediate_operand" "")
4408 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4409 (parallel [(const_int 1)
4413 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4414 operands[2] = force_reg (V2DFmode, operands[2]);
4417 (define_insn "*avx_interleave_highv2df"
4418 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4421 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4422 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4423 (parallel [(const_int 1)
4425 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4427 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4428 vmovddup\t{%H1, %0|%0, %H1}
4429 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4430 vmovhpd\t{%1, %0|%0, %1}"
4431 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4432 (set_attr "prefix" "vex")
4433 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4435 (define_insn "*sse3_interleave_highv2df"
4436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4440 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4441 (parallel [(const_int 1)
4443 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4445 unpckhpd\t{%2, %0|%0, %2}
4446 movddup\t{%H1, %0|%0, %H1}
4447 movlpd\t{%H1, %0|%0, %H1}
4448 movhpd\t{%1, %0|%0, %1}"
4449 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4450 (set_attr "prefix_data16" "*,*,1,1")
4451 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4453 (define_insn "*sse2_interleave_highv2df"
4454 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4457 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4458 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4459 (parallel [(const_int 1)
4461 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4463 unpckhpd\t{%2, %0|%0, %2}
4464 movlpd\t{%H1, %0|%0, %H1}
4465 movhpd\t{%1, %0|%0, %1}"
4466 [(set_attr "type" "sselog,ssemov,ssemov")
4467 (set_attr "prefix_data16" "*,1,1")
4468 (set_attr "mode" "V2DF,V1DF,V1DF")])
4470 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4471 (define_expand "avx_movddup256"
4472 [(set (match_operand:V4DF 0 "register_operand" "")
4475 (match_operand:V4DF 1 "nonimmediate_operand" "")
4477 (parallel [(const_int 0) (const_int 4)
4478 (const_int 2) (const_int 6)])))]
4481 (define_expand "avx_unpcklpd256"
4482 [(set (match_operand:V4DF 0 "register_operand" "")
4485 (match_operand:V4DF 1 "register_operand" "")
4486 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4487 (parallel [(const_int 0) (const_int 4)
4488 (const_int 2) (const_int 6)])))]
4491 (define_insn "*avx_unpcklpd256"
4492 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4495 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4496 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4497 (parallel [(const_int 0) (const_int 4)
4498 (const_int 2) (const_int 6)])))]
4500 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4502 vmovddup\t{%1, %0|%0, %1}
4503 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4504 [(set_attr "type" "sselog")
4505 (set_attr "prefix" "vex")
4506 (set_attr "mode" "V4DF")])
4508 (define_expand "vec_interleave_lowv4df"
4512 (match_operand:V4DF 1 "register_operand" "x")
4513 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4514 (parallel [(const_int 0) (const_int 4)
4515 (const_int 2) (const_int 6)])))
4521 (parallel [(const_int 1) (const_int 5)
4522 (const_int 3) (const_int 7)])))
4523 (set (match_operand:V4DF 0 "register_operand" "")
4528 (parallel [(const_int 0) (const_int 1)
4529 (const_int 4) (const_int 5)])))]
4532 operands[3] = gen_reg_rtx (V4DFmode);
4533 operands[4] = gen_reg_rtx (V4DFmode);
4536 (define_expand "vec_interleave_lowv2df"
4537 [(set (match_operand:V2DF 0 "register_operand" "")
4540 (match_operand:V2DF 1 "nonimmediate_operand" "")
4541 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4542 (parallel [(const_int 0)
4546 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4547 operands[1] = force_reg (V2DFmode, operands[1]);
4550 (define_insn "*avx_interleave_lowv2df"
4551 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4554 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4555 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4556 (parallel [(const_int 0)
4558 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4560 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4561 vmovddup\t{%1, %0|%0, %1}
4562 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4563 vmovlpd\t{%2, %H0|%H0, %2}"
4564 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4565 (set_attr "prefix" "vex")
4566 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4568 (define_insn "*sse3_interleave_lowv2df"
4569 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4572 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4573 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4574 (parallel [(const_int 0)
4576 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4578 unpcklpd\t{%2, %0|%0, %2}
4579 movddup\t{%1, %0|%0, %1}
4580 movhpd\t{%2, %0|%0, %2}
4581 movlpd\t{%2, %H0|%H0, %2}"
4582 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4583 (set_attr "prefix_data16" "*,*,1,1")
4584 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4586 (define_insn "*sse2_interleave_lowv2df"
4587 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4590 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4591 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4592 (parallel [(const_int 0)
4594 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4596 unpcklpd\t{%2, %0|%0, %2}
4597 movhpd\t{%2, %0|%0, %2}
4598 movlpd\t{%2, %H0|%H0, %2}"
4599 [(set_attr "type" "sselog,ssemov,ssemov")
4600 (set_attr "prefix_data16" "*,1,1")
4601 (set_attr "mode" "V2DF,V1DF,V1DF")])
4604 [(set (match_operand:V2DF 0 "memory_operand" "")
4607 (match_operand:V2DF 1 "register_operand" "")
4609 (parallel [(const_int 0)
4611 "TARGET_SSE3 && reload_completed"
4614 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4615 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4616 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4621 [(set (match_operand:V2DF 0 "register_operand" "")
4624 (match_operand:V2DF 1 "memory_operand" "")
4626 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4627 (match_operand:SI 3 "const_int_operand" "")])))]
4628 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4629 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4631 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4634 (define_expand "avx_shufpd256"
4635 [(match_operand:V4DF 0 "register_operand" "")
4636 (match_operand:V4DF 1 "register_operand" "")
4637 (match_operand:V4DF 2 "nonimmediate_operand" "")
4638 (match_operand:SI 3 "const_int_operand" "")]
4641 int mask = INTVAL (operands[3]);
4642 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4644 GEN_INT (mask & 2 ? 5 : 4),
4645 GEN_INT (mask & 4 ? 3 : 2),
4646 GEN_INT (mask & 8 ? 7 : 6)));
4650 (define_insn "avx_shufpd256_1"
4651 [(set (match_operand:V4DF 0 "register_operand" "=x")
4654 (match_operand:V4DF 1 "register_operand" "x")
4655 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4656 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4657 (match_operand 4 "const_4_to_5_operand" "")
4658 (match_operand 5 "const_2_to_3_operand" "")
4659 (match_operand 6 "const_6_to_7_operand" "")])))]
4663 mask = INTVAL (operands[3]);
4664 mask |= (INTVAL (operands[4]) - 4) << 1;
4665 mask |= (INTVAL (operands[5]) - 2) << 2;
4666 mask |= (INTVAL (operands[6]) - 6) << 3;
4667 operands[3] = GEN_INT (mask);
4669 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4671 [(set_attr "type" "sselog")
4672 (set_attr "length_immediate" "1")
4673 (set_attr "prefix" "vex")
4674 (set_attr "mode" "V4DF")])
4676 (define_expand "sse2_shufpd"
4677 [(match_operand:V2DF 0 "register_operand" "")
4678 (match_operand:V2DF 1 "register_operand" "")
4679 (match_operand:V2DF 2 "nonimmediate_operand" "")
4680 (match_operand:SI 3 "const_int_operand" "")]
4683 int mask = INTVAL (operands[3]);
4684 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4686 GEN_INT (mask & 2 ? 3 : 2)));
4690 (define_expand "vec_extract_even<mode>"
4691 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4692 (match_operand:SSEMODE_EO 1 "register_operand" "")
4693 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4696 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4700 (define_expand "vec_extract_odd<mode>"
4701 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4702 (match_operand:SSEMODE_EO 1 "register_operand" "")
4703 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4706 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4710 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4711 (define_insn "*avx_interleave_highv2di"
4712 [(set (match_operand:V2DI 0 "register_operand" "=x")
4715 (match_operand:V2DI 1 "register_operand" "x")
4716 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4717 (parallel [(const_int 1)
4720 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4721 [(set_attr "type" "sselog")
4722 (set_attr "prefix" "vex")
4723 (set_attr "mode" "TI")])
4725 (define_insn "vec_interleave_highv2di"
4726 [(set (match_operand:V2DI 0 "register_operand" "=x")
4729 (match_operand:V2DI 1 "register_operand" "0")
4730 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4731 (parallel [(const_int 1)
4734 "punpckhqdq\t{%2, %0|%0, %2}"
4735 [(set_attr "type" "sselog")
4736 (set_attr "prefix_data16" "1")
4737 (set_attr "mode" "TI")])
4739 (define_insn "*avx_interleave_lowv2di"
4740 [(set (match_operand:V2DI 0 "register_operand" "=x")
4743 (match_operand:V2DI 1 "register_operand" "x")
4744 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4745 (parallel [(const_int 0)
4748 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4749 [(set_attr "type" "sselog")
4750 (set_attr "prefix" "vex")
4751 (set_attr "mode" "TI")])
4753 (define_insn "vec_interleave_lowv2di"
4754 [(set (match_operand:V2DI 0 "register_operand" "=x")
4757 (match_operand:V2DI 1 "register_operand" "0")
4758 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4759 (parallel [(const_int 0)
4762 "punpcklqdq\t{%2, %0|%0, %2}"
4763 [(set_attr "type" "sselog")
4764 (set_attr "prefix_data16" "1")
4765 (set_attr "mode" "TI")])
4767 (define_insn "*avx_shufpd_<mode>"
4768 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4769 (vec_select:SSEMODE2D
4770 (vec_concat:<ssedoublesizemode>
4771 (match_operand:SSEMODE2D 1 "register_operand" "x")
4772 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4773 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4774 (match_operand 4 "const_2_to_3_operand" "")])))]
4778 mask = INTVAL (operands[3]);
4779 mask |= (INTVAL (operands[4]) - 2) << 1;
4780 operands[3] = GEN_INT (mask);
4782 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4784 [(set_attr "type" "sselog")
4785 (set_attr "length_immediate" "1")
4786 (set_attr "prefix" "vex")
4787 (set_attr "mode" "V2DF")])
4789 (define_insn "sse2_shufpd_<mode>"
4790 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4791 (vec_select:SSEMODE2D
4792 (vec_concat:<ssedoublesizemode>
4793 (match_operand:SSEMODE2D 1 "register_operand" "0")
4794 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4795 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4796 (match_operand 4 "const_2_to_3_operand" "")])))]
4800 mask = INTVAL (operands[3]);
4801 mask |= (INTVAL (operands[4]) - 2) << 1;
4802 operands[3] = GEN_INT (mask);
4804 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4806 [(set_attr "type" "sselog")
4807 (set_attr "length_immediate" "1")
4808 (set_attr "mode" "V2DF")])
4810 ;; Avoid combining registers from different units in a single alternative,
4811 ;; see comment above inline_secondary_memory_needed function in i386.c
4812 (define_insn "*avx_storehpd"
4813 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4815 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4816 (parallel [(const_int 1)])))]
4817 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4819 vmovhpd\t{%1, %0|%0, %1}
4820 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4824 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4825 (set_attr "prefix" "vex")
4826 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4828 (define_insn "sse2_storehpd"
4829 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4831 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4832 (parallel [(const_int 1)])))]
4833 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4835 movhpd\t{%1, %0|%0, %1}
4840 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4841 (set_attr "prefix_data16" "1,*,*,*,*")
4842 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4845 [(set (match_operand:DF 0 "register_operand" "")
4847 (match_operand:V2DF 1 "memory_operand" "")
4848 (parallel [(const_int 1)])))]
4849 "TARGET_SSE2 && reload_completed"
4850 [(set (match_dup 0) (match_dup 1))]
4851 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4853 ;; Avoid combining registers from different units in a single alternative,
4854 ;; see comment above inline_secondary_memory_needed function in i386.c
4855 (define_insn "sse2_storelpd"
4856 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4858 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4859 (parallel [(const_int 0)])))]
4860 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4862 %vmovlpd\t{%1, %0|%0, %1}
4867 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4868 (set_attr "prefix_data16" "1,*,*,*,*")
4869 (set_attr "prefix" "maybe_vex")
4870 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4873 [(set (match_operand:DF 0 "register_operand" "")
4875 (match_operand:V2DF 1 "nonimmediate_operand" "")
4876 (parallel [(const_int 0)])))]
4877 "TARGET_SSE2 && reload_completed"
4880 rtx op1 = operands[1];
4882 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4884 op1 = gen_lowpart (DFmode, op1);
4885 emit_move_insn (operands[0], op1);
4889 (define_expand "sse2_loadhpd_exp"
4890 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4893 (match_operand:V2DF 1 "nonimmediate_operand" "")
4894 (parallel [(const_int 0)]))
4895 (match_operand:DF 2 "nonimmediate_operand" "")))]
4898 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4900 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4902 /* Fix up the destination if needed. */
4903 if (dst != operands[0])
4904 emit_move_insn (operands[0], dst);
4909 ;; Avoid combining registers from different units in a single alternative,
4910 ;; see comment above inline_secondary_memory_needed function in i386.c
4911 (define_insn "*avx_loadhpd"
4912 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4915 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4916 (parallel [(const_int 0)]))
4917 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4918 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4920 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4921 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4925 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4926 (set_attr "prefix" "vex")
4927 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4929 (define_insn "sse2_loadhpd"
4930 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4933 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4934 (parallel [(const_int 0)]))
4935 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4936 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4938 movhpd\t{%2, %0|%0, %2}
4939 unpcklpd\t{%2, %0|%0, %2}
4940 shufpd\t{$1, %1, %0|%0, %1, 1}
4944 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4945 (set_attr "prefix_data16" "1,*,*,*,*,*")
4946 (set_attr "length_immediate" "*,*,1,*,*,*")
4947 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4950 [(set (match_operand:V2DF 0 "memory_operand" "")
4952 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4953 (match_operand:DF 1 "register_operand" "")))]
4954 "TARGET_SSE2 && reload_completed"
4955 [(set (match_dup 0) (match_dup 1))]
4956 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4958 (define_expand "sse2_loadlpd_exp"
4959 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4961 (match_operand:DF 2 "nonimmediate_operand" "")
4963 (match_operand:V2DF 1 "nonimmediate_operand" "")
4964 (parallel [(const_int 1)]))))]
4967 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4969 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4971 /* Fix up the destination if needed. */
4972 if (dst != operands[0])
4973 emit_move_insn (operands[0], dst);
4978 ;; Avoid combining registers from different units in a single alternative,
4979 ;; see comment above inline_secondary_memory_needed function in i386.c
4980 (define_insn "*avx_loadlpd"
4981 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4983 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4985 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4986 (parallel [(const_int 1)]))))]
4987 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4989 vmovsd\t{%2, %0|%0, %2}
4990 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4991 vmovsd\t{%2, %1, %0|%0, %1, %2}
4992 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4996 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4997 (set_attr "prefix" "vex")
4998 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5000 (define_insn "sse2_loadlpd"
5001 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5003 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5005 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5006 (parallel [(const_int 1)]))))]
5007 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5009 movsd\t{%2, %0|%0, %2}
5010 movlpd\t{%2, %0|%0, %2}
5011 movsd\t{%2, %0|%0, %2}
5012 shufpd\t{$2, %1, %0|%0, %1, 2}
5013 movhpd\t{%H1, %0|%0, %H1}
5017 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5018 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5019 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5020 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5023 [(set (match_operand:V2DF 0 "memory_operand" "")
5025 (match_operand:DF 1 "register_operand" "")
5026 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5027 "TARGET_SSE2 && reload_completed"
5028 [(set (match_dup 0) (match_dup 1))]
5029 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5031 ;; Not sure these two are ever used, but it doesn't hurt to have
5033 (define_insn "*vec_extractv2df_1_sse"
5034 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5036 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5037 (parallel [(const_int 1)])))]
5038 "!TARGET_SSE2 && TARGET_SSE
5039 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5041 movhps\t{%1, %0|%0, %1}
5042 movhlps\t{%1, %0|%0, %1}
5043 movlps\t{%H1, %0|%0, %H1}"
5044 [(set_attr "type" "ssemov")
5045 (set_attr "mode" "V2SF,V4SF,V2SF")])
5047 (define_insn "*vec_extractv2df_0_sse"
5048 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5050 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5051 (parallel [(const_int 0)])))]
5052 "!TARGET_SSE2 && TARGET_SSE
5053 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5055 movlps\t{%1, %0|%0, %1}
5056 movaps\t{%1, %0|%0, %1}
5057 movlps\t{%1, %0|%0, %1}"
5058 [(set_attr "type" "ssemov")
5059 (set_attr "mode" "V2SF,V4SF,V2SF")])
5061 (define_insn "*avx_movsd"
5062 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5064 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5065 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5069 vmovsd\t{%2, %1, %0|%0, %1, %2}
5070 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5071 vmovlpd\t{%2, %0|%0, %2}
5072 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5073 vmovhps\t{%1, %H0|%H0, %1}"
5074 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5075 (set_attr "prefix" "vex")
5076 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5078 (define_insn "sse2_movsd"
5079 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5081 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5082 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5086 movsd\t{%2, %0|%0, %2}
5087 movlpd\t{%2, %0|%0, %2}
5088 movlpd\t{%2, %0|%0, %2}
5089 shufpd\t{$2, %1, %0|%0, %1, 2}
5090 movhps\t{%H1, %0|%0, %H1}
5091 movhps\t{%1, %H0|%H0, %1}"
5092 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5093 (set_attr "prefix_data16" "*,1,1,*,*,*")
5094 (set_attr "length_immediate" "*,*,*,1,*,*")
5095 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5097 (define_insn "*vec_dupv2df_sse3"
5098 [(set (match_operand:V2DF 0 "register_operand" "=x")
5100 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5102 "%vmovddup\t{%1, %0|%0, %1}"
5103 [(set_attr "type" "sselog1")
5104 (set_attr "prefix" "maybe_vex")
5105 (set_attr "mode" "DF")])
5107 (define_insn "vec_dupv2df"
5108 [(set (match_operand:V2DF 0 "register_operand" "=x")
5110 (match_operand:DF 1 "register_operand" "0")))]
5113 [(set_attr "type" "sselog1")
5114 (set_attr "mode" "V2DF")])
5116 (define_insn "*vec_concatv2df_sse3"
5117 [(set (match_operand:V2DF 0 "register_operand" "=x")
5119 (match_operand:DF 1 "nonimmediate_operand" "xm")
5122 "%vmovddup\t{%1, %0|%0, %1}"
5123 [(set_attr "type" "sselog1")
5124 (set_attr "prefix" "maybe_vex")
5125 (set_attr "mode" "DF")])
5127 (define_insn "*vec_concatv2df_avx"
5128 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5130 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5131 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5134 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5135 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5136 vmovsd\t{%1, %0|%0, %1}"
5137 [(set_attr "type" "ssemov")
5138 (set_attr "prefix" "vex")
5139 (set_attr "mode" "DF,V1DF,DF")])
5141 (define_insn "*vec_concatv2df"
5142 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5144 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5145 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5148 unpcklpd\t{%2, %0|%0, %2}
5149 movhpd\t{%2, %0|%0, %2}
5150 movsd\t{%1, %0|%0, %1}
5151 movlhps\t{%2, %0|%0, %2}
5152 movhps\t{%2, %0|%0, %2}"
5153 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5154 (set_attr "prefix_data16" "*,1,*,*,*")
5155 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5157 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5159 ;; Parallel integral arithmetic
5161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5163 (define_expand "neg<mode>2"
5164 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5167 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5169 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5171 (define_expand "<plusminus_insn><mode>3"
5172 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5174 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5175 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5177 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5179 (define_insn "*avx_<plusminus_insn><mode>3"
5180 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5182 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5183 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5184 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5185 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5186 [(set_attr "type" "sseiadd")
5187 (set_attr "prefix" "vex")
5188 (set_attr "mode" "TI")])
5190 (define_insn "*<plusminus_insn><mode>3"
5191 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5193 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5194 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5195 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5196 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5197 [(set_attr "type" "sseiadd")
5198 (set_attr "prefix_data16" "1")
5199 (set_attr "mode" "TI")])
5201 (define_expand "sse2_<plusminus_insn><mode>3"
5202 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5203 (sat_plusminus:SSEMODE12
5204 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5205 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5207 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5209 (define_insn "*avx_<plusminus_insn><mode>3"
5210 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5211 (sat_plusminus:SSEMODE12
5212 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5213 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5214 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5215 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5216 [(set_attr "type" "sseiadd")
5217 (set_attr "prefix" "vex")
5218 (set_attr "mode" "TI")])
5220 (define_insn "*sse2_<plusminus_insn><mode>3"
5221 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5222 (sat_plusminus:SSEMODE12
5223 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5224 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5225 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5226 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5227 [(set_attr "type" "sseiadd")
5228 (set_attr "prefix_data16" "1")
5229 (set_attr "mode" "TI")])
5231 (define_insn_and_split "mulv16qi3"
5232 [(set (match_operand:V16QI 0 "register_operand" "")
5233 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5234 (match_operand:V16QI 2 "register_operand" "")))]
5236 && can_create_pseudo_p ()"
5244 for (i = 0; i < 6; ++i)
5245 t[i] = gen_reg_rtx (V16QImode);
5247 /* Unpack data such that we've got a source byte in each low byte of
5248 each word. We don't care what goes into the high byte of each word.
5249 Rather than trying to get zero in there, most convenient is to let
5250 it be a copy of the low byte. */
5251 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5252 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5253 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5254 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5256 /* Multiply words. The end-of-line annotations here give a picture of what
5257 the output of that instruction looks like. Dot means don't care; the
5258 letters are the bytes of the result with A being the most significant. */
5259 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5260 gen_lowpart (V8HImode, t[0]),
5261 gen_lowpart (V8HImode, t[1])));
5262 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5263 gen_lowpart (V8HImode, t[2]),
5264 gen_lowpart (V8HImode, t[3])));
5266 /* Extract the even bytes and merge them back together. */
5267 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5271 (define_expand "mulv8hi3"
5272 [(set (match_operand:V8HI 0 "register_operand" "")
5273 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5274 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5276 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5278 (define_insn "*avx_mulv8hi3"
5279 [(set (match_operand:V8HI 0 "register_operand" "=x")
5280 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5281 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5282 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5283 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5284 [(set_attr "type" "sseimul")
5285 (set_attr "prefix" "vex")
5286 (set_attr "mode" "TI")])
5288 (define_insn "*mulv8hi3"
5289 [(set (match_operand:V8HI 0 "register_operand" "=x")
5290 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5291 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5292 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5293 "pmullw\t{%2, %0|%0, %2}"
5294 [(set_attr "type" "sseimul")
5295 (set_attr "prefix_data16" "1")
5296 (set_attr "mode" "TI")])
5298 (define_expand "<s>mulv8hi3_highpart"
5299 [(set (match_operand:V8HI 0 "register_operand" "")
5304 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5306 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5309 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5311 (define_insn "*avx_<s>mulv8hi3_highpart"
5312 [(set (match_operand:V8HI 0 "register_operand" "=x")
5317 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5319 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5321 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5322 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5323 [(set_attr "type" "sseimul")
5324 (set_attr "prefix" "vex")
5325 (set_attr "mode" "TI")])
5327 (define_insn "*<s>mulv8hi3_highpart"
5328 [(set (match_operand:V8HI 0 "register_operand" "=x")
5333 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5335 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5337 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5338 "pmulh<u>w\t{%2, %0|%0, %2}"
5339 [(set_attr "type" "sseimul")
5340 (set_attr "prefix_data16" "1")
5341 (set_attr "mode" "TI")])
5343 (define_expand "sse2_umulv2siv2di3"
5344 [(set (match_operand:V2DI 0 "register_operand" "")
5348 (match_operand:V4SI 1 "nonimmediate_operand" "")
5349 (parallel [(const_int 0) (const_int 2)])))
5352 (match_operand:V4SI 2 "nonimmediate_operand" "")
5353 (parallel [(const_int 0) (const_int 2)])))))]
5355 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5357 (define_insn "*avx_umulv2siv2di3"
5358 [(set (match_operand:V2DI 0 "register_operand" "=x")
5362 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5363 (parallel [(const_int 0) (const_int 2)])))
5366 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5367 (parallel [(const_int 0) (const_int 2)])))))]
5368 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5369 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5370 [(set_attr "type" "sseimul")
5371 (set_attr "prefix" "vex")
5372 (set_attr "mode" "TI")])
5374 (define_insn "*sse2_umulv2siv2di3"
5375 [(set (match_operand:V2DI 0 "register_operand" "=x")
5379 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5380 (parallel [(const_int 0) (const_int 2)])))
5383 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5384 (parallel [(const_int 0) (const_int 2)])))))]
5385 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5386 "pmuludq\t{%2, %0|%0, %2}"
5387 [(set_attr "type" "sseimul")
5388 (set_attr "prefix_data16" "1")
5389 (set_attr "mode" "TI")])
5391 (define_expand "sse4_1_mulv2siv2di3"
5392 [(set (match_operand:V2DI 0 "register_operand" "")
5396 (match_operand:V4SI 1 "nonimmediate_operand" "")
5397 (parallel [(const_int 0) (const_int 2)])))
5400 (match_operand:V4SI 2 "nonimmediate_operand" "")
5401 (parallel [(const_int 0) (const_int 2)])))))]
5403 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5405 (define_insn "*avx_mulv2siv2di3"
5406 [(set (match_operand:V2DI 0 "register_operand" "=x")
5410 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5411 (parallel [(const_int 0) (const_int 2)])))
5414 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5415 (parallel [(const_int 0) (const_int 2)])))))]
5416 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5417 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5418 [(set_attr "type" "sseimul")
5419 (set_attr "prefix_extra" "1")
5420 (set_attr "prefix" "vex")
5421 (set_attr "mode" "TI")])
5423 (define_insn "*sse4_1_mulv2siv2di3"
5424 [(set (match_operand:V2DI 0 "register_operand" "=x")
5428 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5429 (parallel [(const_int 0) (const_int 2)])))
5432 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5433 (parallel [(const_int 0) (const_int 2)])))))]
5434 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5435 "pmuldq\t{%2, %0|%0, %2}"
5436 [(set_attr "type" "sseimul")
5437 (set_attr "prefix_extra" "1")
5438 (set_attr "mode" "TI")])
5440 (define_expand "sse2_pmaddwd"
5441 [(set (match_operand:V4SI 0 "register_operand" "")
5446 (match_operand:V8HI 1 "nonimmediate_operand" "")
5447 (parallel [(const_int 0)
5453 (match_operand:V8HI 2 "nonimmediate_operand" "")
5454 (parallel [(const_int 0)
5460 (vec_select:V4HI (match_dup 1)
5461 (parallel [(const_int 1)
5466 (vec_select:V4HI (match_dup 2)
5467 (parallel [(const_int 1)
5470 (const_int 7)]))))))]
5472 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5474 (define_insn "*avx_pmaddwd"
5475 [(set (match_operand:V4SI 0 "register_operand" "=x")
5480 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5481 (parallel [(const_int 0)
5487 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5488 (parallel [(const_int 0)
5494 (vec_select:V4HI (match_dup 1)
5495 (parallel [(const_int 1)
5500 (vec_select:V4HI (match_dup 2)
5501 (parallel [(const_int 1)
5504 (const_int 7)]))))))]
5505 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5506 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5507 [(set_attr "type" "sseiadd")
5508 (set_attr "prefix" "vex")
5509 (set_attr "mode" "TI")])
5511 (define_insn "*sse2_pmaddwd"
5512 [(set (match_operand:V4SI 0 "register_operand" "=x")
5517 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5518 (parallel [(const_int 0)
5524 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5525 (parallel [(const_int 0)
5531 (vec_select:V4HI (match_dup 1)
5532 (parallel [(const_int 1)
5537 (vec_select:V4HI (match_dup 2)
5538 (parallel [(const_int 1)
5541 (const_int 7)]))))))]
5542 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5543 "pmaddwd\t{%2, %0|%0, %2}"
5544 [(set_attr "type" "sseiadd")
5545 (set_attr "atom_unit" "simul")
5546 (set_attr "prefix_data16" "1")
5547 (set_attr "mode" "TI")])
5549 (define_expand "mulv4si3"
5550 [(set (match_operand:V4SI 0 "register_operand" "")
5551 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5552 (match_operand:V4SI 2 "register_operand" "")))]
5555 if (TARGET_SSE4_1 || TARGET_AVX)
5556 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5559 (define_insn "*avx_mulv4si3"
5560 [(set (match_operand:V4SI 0 "register_operand" "=x")
5561 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5562 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5563 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5564 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5565 [(set_attr "type" "sseimul")
5566 (set_attr "prefix_extra" "1")
5567 (set_attr "prefix" "vex")
5568 (set_attr "mode" "TI")])
5570 (define_insn "*sse4_1_mulv4si3"
5571 [(set (match_operand:V4SI 0 "register_operand" "=x")
5572 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5573 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5574 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5575 "pmulld\t{%2, %0|%0, %2}"
5576 [(set_attr "type" "sseimul")
5577 (set_attr "prefix_extra" "1")
5578 (set_attr "mode" "TI")])
5580 (define_insn_and_split "*sse2_mulv4si3"
5581 [(set (match_operand:V4SI 0 "register_operand" "")
5582 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5583 (match_operand:V4SI 2 "register_operand" "")))]
5584 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5585 && can_create_pseudo_p ()"
5590 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5596 t1 = gen_reg_rtx (V4SImode);
5597 t2 = gen_reg_rtx (V4SImode);
5598 t3 = gen_reg_rtx (V4SImode);
5599 t4 = gen_reg_rtx (V4SImode);
5600 t5 = gen_reg_rtx (V4SImode);
5601 t6 = gen_reg_rtx (V4SImode);
5602 thirtytwo = GEN_INT (32);
5604 /* Multiply elements 2 and 0. */
5605 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5608 /* Shift both input vectors down one element, so that elements 3
5609 and 1 are now in the slots for elements 2 and 0. For K8, at
5610 least, this is faster than using a shuffle. */
5611 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5612 gen_lowpart (V1TImode, op1),
5614 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5615 gen_lowpart (V1TImode, op2),
5617 /* Multiply elements 3 and 1. */
5618 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5621 /* Move the results in element 2 down to element 1; we don't care
5622 what goes in elements 2 and 3. */
5623 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5624 const0_rtx, const0_rtx));
5625 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5626 const0_rtx, const0_rtx));
5628 /* Merge the parts back together. */
5629 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5633 (define_insn_and_split "mulv2di3"
5634 [(set (match_operand:V2DI 0 "register_operand" "")
5635 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5636 (match_operand:V2DI 2 "register_operand" "")))]
5638 && can_create_pseudo_p ()"
5643 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5652 /* op1: A,B,C,D, op2: E,F,G,H */
5653 op1 = gen_lowpart (V4SImode, op1);
5654 op2 = gen_lowpart (V4SImode, op2);
5656 t1 = gen_reg_rtx (V4SImode);
5657 t2 = gen_reg_rtx (V4SImode);
5658 t3 = gen_reg_rtx (V2DImode);
5659 t4 = gen_reg_rtx (V2DImode);
5662 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5668 /* t2: (B*E),(A*F),(D*G),(C*H) */
5669 emit_insn (gen_mulv4si3 (t2, t1, op2));
5671 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5672 emit_insn (gen_xop_phadddq (t3, t2));
5674 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5675 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5677 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5678 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5682 t1 = gen_reg_rtx (V2DImode);
5683 t2 = gen_reg_rtx (V2DImode);
5684 t3 = gen_reg_rtx (V2DImode);
5685 t4 = gen_reg_rtx (V2DImode);
5686 t5 = gen_reg_rtx (V2DImode);
5687 t6 = gen_reg_rtx (V2DImode);
5688 thirtytwo = GEN_INT (32);
5690 /* Multiply low parts. */
5691 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5692 gen_lowpart (V4SImode, op2)));
5694 /* Shift input vectors left 32 bits so we can multiply high parts. */
5695 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5696 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5698 /* Multiply high parts by low parts. */
5699 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5700 gen_lowpart (V4SImode, t3)));
5701 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5702 gen_lowpart (V4SImode, t2)));
5704 /* Shift them back. */
5705 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5706 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5708 /* Add the three parts together. */
5709 emit_insn (gen_addv2di3 (t6, t1, t4));
5710 emit_insn (gen_addv2di3 (op0, t6, t5));
5715 (define_expand "vec_widen_smult_hi_v8hi"
5716 [(match_operand:V4SI 0 "register_operand" "")
5717 (match_operand:V8HI 1 "register_operand" "")
5718 (match_operand:V8HI 2 "register_operand" "")]
5721 rtx op1, op2, t1, t2, dest;
5725 t1 = gen_reg_rtx (V8HImode);
5726 t2 = gen_reg_rtx (V8HImode);
5727 dest = gen_lowpart (V8HImode, operands[0]);
5729 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5730 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5731 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5735 (define_expand "vec_widen_smult_lo_v8hi"
5736 [(match_operand:V4SI 0 "register_operand" "")
5737 (match_operand:V8HI 1 "register_operand" "")
5738 (match_operand:V8HI 2 "register_operand" "")]
5741 rtx op1, op2, t1, t2, dest;
5745 t1 = gen_reg_rtx (V8HImode);
5746 t2 = gen_reg_rtx (V8HImode);
5747 dest = gen_lowpart (V8HImode, operands[0]);
5749 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5750 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5751 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5755 (define_expand "vec_widen_umult_hi_v8hi"
5756 [(match_operand:V4SI 0 "register_operand" "")
5757 (match_operand:V8HI 1 "register_operand" "")
5758 (match_operand:V8HI 2 "register_operand" "")]
5761 rtx op1, op2, t1, t2, dest;
5765 t1 = gen_reg_rtx (V8HImode);
5766 t2 = gen_reg_rtx (V8HImode);
5767 dest = gen_lowpart (V8HImode, operands[0]);
5769 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5770 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5771 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5775 (define_expand "vec_widen_umult_lo_v8hi"
5776 [(match_operand:V4SI 0 "register_operand" "")
5777 (match_operand:V8HI 1 "register_operand" "")
5778 (match_operand:V8HI 2 "register_operand" "")]
5781 rtx op1, op2, t1, t2, dest;
5785 t1 = gen_reg_rtx (V8HImode);
5786 t2 = gen_reg_rtx (V8HImode);
5787 dest = gen_lowpart (V8HImode, operands[0]);
5789 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5790 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5791 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5795 (define_expand "vec_widen_smult_hi_v4si"
5796 [(match_operand:V2DI 0 "register_operand" "")
5797 (match_operand:V4SI 1 "register_operand" "")
5798 (match_operand:V4SI 2 "register_operand" "")]
5803 t1 = gen_reg_rtx (V4SImode);
5804 t2 = gen_reg_rtx (V4SImode);
5806 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5811 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5816 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5820 (define_expand "vec_widen_smult_lo_v4si"
5821 [(match_operand:V2DI 0 "register_operand" "")
5822 (match_operand:V4SI 1 "register_operand" "")
5823 (match_operand:V4SI 2 "register_operand" "")]
5828 t1 = gen_reg_rtx (V4SImode);
5829 t2 = gen_reg_rtx (V4SImode);
5831 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5836 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5841 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5845 (define_expand "vec_widen_umult_hi_v4si"
5846 [(match_operand:V2DI 0 "register_operand" "")
5847 (match_operand:V4SI 1 "register_operand" "")
5848 (match_operand:V4SI 2 "register_operand" "")]
5851 rtx op1, op2, t1, t2;
5855 t1 = gen_reg_rtx (V4SImode);
5856 t2 = gen_reg_rtx (V4SImode);
5858 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5859 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5860 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5864 (define_expand "vec_widen_umult_lo_v4si"
5865 [(match_operand:V2DI 0 "register_operand" "")
5866 (match_operand:V4SI 1 "register_operand" "")
5867 (match_operand:V4SI 2 "register_operand" "")]
5870 rtx op1, op2, t1, t2;
5874 t1 = gen_reg_rtx (V4SImode);
5875 t2 = gen_reg_rtx (V4SImode);
5877 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5878 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5879 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5883 (define_expand "sdot_prodv8hi"
5884 [(match_operand:V4SI 0 "register_operand" "")
5885 (match_operand:V8HI 1 "register_operand" "")
5886 (match_operand:V8HI 2 "register_operand" "")
5887 (match_operand:V4SI 3 "register_operand" "")]
5890 rtx t = gen_reg_rtx (V4SImode);
5891 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5892 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5896 (define_expand "udot_prodv4si"
5897 [(match_operand:V2DI 0 "register_operand" "")
5898 (match_operand:V4SI 1 "register_operand" "")
5899 (match_operand:V4SI 2 "register_operand" "")
5900 (match_operand:V2DI 3 "register_operand" "")]
5905 t1 = gen_reg_rtx (V2DImode);
5906 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5907 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5909 t2 = gen_reg_rtx (V4SImode);
5910 t3 = gen_reg_rtx (V4SImode);
5911 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5912 gen_lowpart (V1TImode, operands[1]),
5914 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5915 gen_lowpart (V1TImode, operands[2]),
5918 t4 = gen_reg_rtx (V2DImode);
5919 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5921 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5925 (define_insn "*avx_ashr<mode>3"
5926 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5928 (match_operand:SSEMODE24 1 "register_operand" "x")
5929 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5931 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5932 [(set_attr "type" "sseishft")
5933 (set_attr "prefix" "vex")
5934 (set (attr "length_immediate")
5935 (if_then_else (match_operand 2 "const_int_operand" "")
5937 (const_string "0")))
5938 (set_attr "mode" "TI")])
5940 (define_insn "ashr<mode>3"
5941 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5943 (match_operand:SSEMODE24 1 "register_operand" "0")
5944 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5946 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5947 [(set_attr "type" "sseishft")
5948 (set_attr "prefix_data16" "1")
5949 (set (attr "length_immediate")
5950 (if_then_else (match_operand 2 "const_int_operand" "")
5952 (const_string "0")))
5953 (set_attr "mode" "TI")])
5955 (define_insn "*avx_lshrv1ti3"
5956 [(set (match_operand:V1TI 0 "register_operand" "=x")
5958 (match_operand:V1TI 1 "register_operand" "x")
5959 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5962 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5963 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5965 [(set_attr "type" "sseishft")
5966 (set_attr "prefix" "vex")
5967 (set_attr "length_immediate" "1")
5968 (set_attr "mode" "TI")])
5970 (define_insn "*avx_lshr<mode>3"
5971 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5972 (lshiftrt:SSEMODE248
5973 (match_operand:SSEMODE248 1 "register_operand" "x")
5974 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5976 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5977 [(set_attr "type" "sseishft")
5978 (set_attr "prefix" "vex")
5979 (set (attr "length_immediate")
5980 (if_then_else (match_operand 2 "const_int_operand" "")
5982 (const_string "0")))
5983 (set_attr "mode" "TI")])
5985 (define_insn "sse2_lshrv1ti3"
5986 [(set (match_operand:V1TI 0 "register_operand" "=x")
5988 (match_operand:V1TI 1 "register_operand" "0")
5989 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5992 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5993 return "psrldq\t{%2, %0|%0, %2}";
5995 [(set_attr "type" "sseishft")
5996 (set_attr "prefix_data16" "1")
5997 (set_attr "length_immediate" "1")
5998 (set_attr "atom_unit" "sishuf")
5999 (set_attr "mode" "TI")])
6001 (define_insn "lshr<mode>3"
6002 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6003 (lshiftrt:SSEMODE248
6004 (match_operand:SSEMODE248 1 "register_operand" "0")
6005 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6007 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6008 [(set_attr "type" "sseishft")
6009 (set_attr "prefix_data16" "1")
6010 (set (attr "length_immediate")
6011 (if_then_else (match_operand 2 "const_int_operand" "")
6013 (const_string "0")))
6014 (set_attr "mode" "TI")])
6016 (define_insn "*avx_ashlv1ti3"
6017 [(set (match_operand:V1TI 0 "register_operand" "=x")
6018 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6019 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6022 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6023 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6025 [(set_attr "type" "sseishft")
6026 (set_attr "prefix" "vex")
6027 (set_attr "length_immediate" "1")
6028 (set_attr "mode" "TI")])
6030 (define_insn "*avx_ashl<mode>3"
6031 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6033 (match_operand:SSEMODE248 1 "register_operand" "x")
6034 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6036 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6037 [(set_attr "type" "sseishft")
6038 (set_attr "prefix" "vex")
6039 (set (attr "length_immediate")
6040 (if_then_else (match_operand 2 "const_int_operand" "")
6042 (const_string "0")))
6043 (set_attr "mode" "TI")])
6045 (define_insn "sse2_ashlv1ti3"
6046 [(set (match_operand:V1TI 0 "register_operand" "=x")
6047 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6048 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6051 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6052 return "pslldq\t{%2, %0|%0, %2}";
6054 [(set_attr "type" "sseishft")
6055 (set_attr "prefix_data16" "1")
6056 (set_attr "length_immediate" "1")
6057 (set_attr "mode" "TI")])
6059 (define_insn "ashl<mode>3"
6060 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6062 (match_operand:SSEMODE248 1 "register_operand" "0")
6063 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6065 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6066 [(set_attr "type" "sseishft")
6067 (set_attr "prefix_data16" "1")
6068 (set (attr "length_immediate")
6069 (if_then_else (match_operand 2 "const_int_operand" "")
6071 (const_string "0")))
6072 (set_attr "mode" "TI")])
6074 (define_expand "vec_shl_<mode>"
6075 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6077 (match_operand:SSEMODEI 1 "register_operand" "")
6078 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6081 operands[0] = gen_lowpart (V1TImode, operands[0]);
6082 operands[1] = gen_lowpart (V1TImode, operands[1]);
6085 (define_expand "vec_shr_<mode>"
6086 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6088 (match_operand:SSEMODEI 1 "register_operand" "")
6089 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6092 operands[0] = gen_lowpart (V1TImode, operands[0]);
6093 operands[1] = gen_lowpart (V1TImode, operands[1]);
6096 (define_insn "*avx_<code><mode>3"
6097 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6099 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6100 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6101 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6102 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6103 [(set_attr "type" "sseiadd")
6104 (set (attr "prefix_extra")
6105 (if_then_else (match_operand:V16QI 0 "" "")
6107 (const_string "1")))
6108 (set_attr "prefix" "vex")
6109 (set_attr "mode" "TI")])
6111 (define_expand "<code>v16qi3"
6112 [(set (match_operand:V16QI 0 "register_operand" "")
6114 (match_operand:V16QI 1 "nonimmediate_operand" "")
6115 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6117 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6119 (define_insn "*<code>v16qi3"
6120 [(set (match_operand:V16QI 0 "register_operand" "=x")
6122 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6123 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6124 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6125 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6126 [(set_attr "type" "sseiadd")
6127 (set_attr "prefix_data16" "1")
6128 (set_attr "mode" "TI")])
6130 (define_insn "*avx_<code><mode>3"
6131 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6133 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6134 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6135 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6136 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6137 [(set_attr "type" "sseiadd")
6138 (set (attr "prefix_extra")
6139 (if_then_else (match_operand:V8HI 0 "" "")
6141 (const_string "1")))
6142 (set_attr "prefix" "vex")
6143 (set_attr "mode" "TI")])
6145 (define_expand "<code>v8hi3"
6146 [(set (match_operand:V8HI 0 "register_operand" "")
6148 (match_operand:V8HI 1 "nonimmediate_operand" "")
6149 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6151 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6153 (define_insn "*<code>v8hi3"
6154 [(set (match_operand:V8HI 0 "register_operand" "=x")
6156 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6157 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6158 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6159 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6160 [(set_attr "type" "sseiadd")
6161 (set_attr "prefix_data16" "1")
6162 (set_attr "mode" "TI")])
6164 (define_expand "umaxv8hi3"
6165 [(set (match_operand:V8HI 0 "register_operand" "")
6166 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6167 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6171 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6174 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6175 if (rtx_equal_p (op3, op2))
6176 op3 = gen_reg_rtx (V8HImode);
6177 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6178 emit_insn (gen_addv8hi3 (op0, op3, op2));
6183 (define_expand "smax<mode>3"
6184 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6185 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6186 (match_operand:SSEMODE14 2 "register_operand" "")))]
6190 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6196 xops[0] = operands[0];
6197 xops[1] = operands[1];
6198 xops[2] = operands[2];
6199 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6200 xops[4] = operands[1];
6201 xops[5] = operands[2];
6202 ok = ix86_expand_int_vcond (xops);
6208 (define_insn "*sse4_1_<code><mode>3"
6209 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6211 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6212 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6213 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6214 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6215 [(set_attr "type" "sseiadd")
6216 (set_attr "prefix_extra" "1")
6217 (set_attr "mode" "TI")])
6219 (define_expand "smaxv2di3"
6220 [(set (match_operand:V2DI 0 "register_operand" "")
6221 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6222 (match_operand:V2DI 2 "register_operand" "")))]
6228 xops[0] = operands[0];
6229 xops[1] = operands[1];
6230 xops[2] = operands[2];
6231 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6232 xops[4] = operands[1];
6233 xops[5] = operands[2];
6234 ok = ix86_expand_int_vcond (xops);
6239 (define_expand "umaxv4si3"
6240 [(set (match_operand:V4SI 0 "register_operand" "")
6241 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6242 (match_operand:V4SI 2 "register_operand" "")))]
6246 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6252 xops[0] = operands[0];
6253 xops[1] = operands[1];
6254 xops[2] = operands[2];
6255 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6256 xops[4] = operands[1];
6257 xops[5] = operands[2];
6258 ok = ix86_expand_int_vcond (xops);
6264 (define_insn "*sse4_1_<code><mode>3"
6265 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6267 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6268 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6269 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6270 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6271 [(set_attr "type" "sseiadd")
6272 (set_attr "prefix_extra" "1")
6273 (set_attr "mode" "TI")])
6275 (define_expand "umaxv2di3"
6276 [(set (match_operand:V2DI 0 "register_operand" "")
6277 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6278 (match_operand:V2DI 2 "register_operand" "")))]
6284 xops[0] = operands[0];
6285 xops[1] = operands[1];
6286 xops[2] = operands[2];
6287 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6288 xops[4] = operands[1];
6289 xops[5] = operands[2];
6290 ok = ix86_expand_int_vcond (xops);
6295 (define_expand "smin<mode>3"
6296 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6297 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6298 (match_operand:SSEMODE14 2 "register_operand" "")))]
6302 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6308 xops[0] = operands[0];
6309 xops[1] = operands[2];
6310 xops[2] = operands[1];
6311 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6312 xops[4] = operands[1];
6313 xops[5] = operands[2];
6314 ok = ix86_expand_int_vcond (xops);
6320 (define_expand "sminv2di3"
6321 [(set (match_operand:V2DI 0 "register_operand" "")
6322 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6323 (match_operand:V2DI 2 "register_operand" "")))]
6329 xops[0] = operands[0];
6330 xops[1] = operands[2];
6331 xops[2] = operands[1];
6332 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6333 xops[4] = operands[1];
6334 xops[5] = operands[2];
6335 ok = ix86_expand_int_vcond (xops);
6340 (define_expand "umin<mode>3"
6341 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6342 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6343 (match_operand:SSEMODE24 2 "register_operand" "")))]
6347 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6353 xops[0] = operands[0];
6354 xops[1] = operands[2];
6355 xops[2] = operands[1];
6356 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6357 xops[4] = operands[1];
6358 xops[5] = operands[2];
6359 ok = ix86_expand_int_vcond (xops);
6365 (define_expand "uminv2di3"
6366 [(set (match_operand:V2DI 0 "register_operand" "")
6367 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6368 (match_operand:V2DI 2 "register_operand" "")))]
6374 xops[0] = operands[0];
6375 xops[1] = operands[2];
6376 xops[2] = operands[1];
6377 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6378 xops[4] = operands[1];
6379 xops[5] = operands[2];
6380 ok = ix86_expand_int_vcond (xops);
6385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6387 ;; Parallel integral comparisons
6389 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6391 (define_expand "sse2_eq<mode>3"
6392 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6394 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6395 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6396 "TARGET_SSE2 && !TARGET_XOP "
6397 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6399 (define_insn "*avx_eq<mode>3"
6400 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6402 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6403 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6404 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6405 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6406 [(set_attr "type" "ssecmp")
6407 (set (attr "prefix_extra")
6408 (if_then_else (match_operand:V2DI 0 "" "")
6410 (const_string "*")))
6411 (set_attr "prefix" "vex")
6412 (set_attr "mode" "TI")])
6414 (define_insn "*sse2_eq<mode>3"
6415 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6417 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6418 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6419 "TARGET_SSE2 && !TARGET_XOP
6420 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6421 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6422 [(set_attr "type" "ssecmp")
6423 (set_attr "prefix_data16" "1")
6424 (set_attr "mode" "TI")])
6426 (define_expand "sse4_1_eqv2di3"
6427 [(set (match_operand:V2DI 0 "register_operand" "")
6429 (match_operand:V2DI 1 "nonimmediate_operand" "")
6430 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6432 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6434 (define_insn "*sse4_1_eqv2di3"
6435 [(set (match_operand:V2DI 0 "register_operand" "=x")
6437 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6438 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6439 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6440 "pcmpeqq\t{%2, %0|%0, %2}"
6441 [(set_attr "type" "ssecmp")
6442 (set_attr "prefix_extra" "1")
6443 (set_attr "mode" "TI")])
6445 (define_insn "*avx_gt<mode>3"
6446 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6448 (match_operand:SSEMODE1248 1 "register_operand" "x")
6449 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6451 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6452 [(set_attr "type" "ssecmp")
6453 (set (attr "prefix_extra")
6454 (if_then_else (match_operand:V2DI 0 "" "")
6456 (const_string "*")))
6457 (set_attr "prefix" "vex")
6458 (set_attr "mode" "TI")])
6460 (define_insn "sse2_gt<mode>3"
6461 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6463 (match_operand:SSEMODE124 1 "register_operand" "0")
6464 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6465 "TARGET_SSE2 && !TARGET_XOP"
6466 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6467 [(set_attr "type" "ssecmp")
6468 (set_attr "prefix_data16" "1")
6469 (set_attr "mode" "TI")])
6471 (define_insn "sse4_2_gtv2di3"
6472 [(set (match_operand:V2DI 0 "register_operand" "=x")
6474 (match_operand:V2DI 1 "register_operand" "0")
6475 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6477 "pcmpgtq\t{%2, %0|%0, %2}"
6478 [(set_attr "type" "ssecmp")
6479 (set_attr "prefix_extra" "1")
6480 (set_attr "mode" "TI")])
6482 (define_expand "vcond<mode>"
6483 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6484 (if_then_else:SSEMODE124C8
6485 (match_operator 3 ""
6486 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6487 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6488 (match_operand:SSEMODE124C8 1 "general_operand" "")
6489 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6492 bool ok = ix86_expand_int_vcond (operands);
6497 (define_expand "vcondu<mode>"
6498 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6499 (if_then_else:SSEMODE124C8
6500 (match_operator 3 ""
6501 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6502 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6503 (match_operand:SSEMODE124C8 1 "general_operand" "")
6504 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6507 bool ok = ix86_expand_int_vcond (operands);
6512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6514 ;; Parallel bitwise logical operations
6516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6518 (define_expand "one_cmpl<mode>2"
6519 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6520 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6524 int i, n = GET_MODE_NUNITS (<MODE>mode);
6525 rtvec v = rtvec_alloc (n);
6527 for (i = 0; i < n; ++i)
6528 RTVEC_ELT (v, i) = constm1_rtx;
6530 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6533 (define_insn "*avx_andnot<mode>3"
6534 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6536 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6537 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6539 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6540 [(set_attr "type" "sselog")
6541 (set_attr "prefix" "vex")
6542 (set_attr "mode" "<avxvecpsmode>")])
6544 (define_insn "*sse_andnot<mode>3"
6545 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6547 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6548 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6549 "(TARGET_SSE && !TARGET_SSE2)"
6550 "andnps\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "mode" "V4SF")])
6554 (define_insn "*avx_andnot<mode>3"
6555 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6557 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6558 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6560 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6561 [(set_attr "type" "sselog")
6562 (set_attr "prefix" "vex")
6563 (set_attr "mode" "TI")])
6565 (define_insn "sse2_andnot<mode>3"
6566 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6568 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6569 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6571 "pandn\t{%2, %0|%0, %2}"
6572 [(set_attr "type" "sselog")
6573 (set_attr "prefix_data16" "1")
6574 (set_attr "mode" "TI")])
6576 (define_insn "*andnottf3"
6577 [(set (match_operand:TF 0 "register_operand" "=x")
6579 (not:TF (match_operand:TF 1 "register_operand" "0"))
6580 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6582 "pandn\t{%2, %0|%0, %2}"
6583 [(set_attr "type" "sselog")
6584 (set_attr "prefix_data16" "1")
6585 (set_attr "mode" "TI")])
6587 (define_expand "<code><mode>3"
6588 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6590 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6591 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6595 (define_insn "*avx_<code><mode>3"
6596 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6597 (any_logic:AVX256MODEI
6598 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6599 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6601 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6602 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6603 [(set_attr "type" "sselog")
6604 (set_attr "prefix" "vex")
6605 (set_attr "mode" "<avxvecpsmode>")])
6607 (define_insn "*sse_<code><mode>3"
6608 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6610 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6611 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6612 "(TARGET_SSE && !TARGET_SSE2)
6613 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6614 "<logic>ps\t{%2, %0|%0, %2}"
6615 [(set_attr "type" "sselog")
6616 (set_attr "mode" "V4SF")])
6618 (define_insn "*avx_<code><mode>3"
6619 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6621 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6622 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6624 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6625 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6626 [(set_attr "type" "sselog")
6627 (set_attr "prefix" "vex")
6628 (set_attr "mode" "TI")])
6630 (define_insn "*sse2_<code><mode>3"
6631 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6633 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6634 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6635 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6636 "p<logic>\t{%2, %0|%0, %2}"
6637 [(set_attr "type" "sselog")
6638 (set_attr "prefix_data16" "1")
6639 (set_attr "mode" "TI")])
6641 (define_expand "<code>tf3"
6642 [(set (match_operand:TF 0 "register_operand" "")
6644 (match_operand:TF 1 "nonimmediate_operand" "")
6645 (match_operand:TF 2 "nonimmediate_operand" "")))]
6647 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6649 (define_insn "*<code>tf3"
6650 [(set (match_operand:TF 0 "register_operand" "=x")
6652 (match_operand:TF 1 "nonimmediate_operand" "%0")
6653 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6654 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6655 "p<logic>\t{%2, %0|%0, %2}"
6656 [(set_attr "type" "sselog")
6657 (set_attr "prefix_data16" "1")
6658 (set_attr "mode" "TI")])
6660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6662 ;; Parallel integral element swizzling
6664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6666 (define_expand "vec_pack_trunc_v8hi"
6667 [(match_operand:V16QI 0 "register_operand" "")
6668 (match_operand:V8HI 1 "register_operand" "")
6669 (match_operand:V8HI 2 "register_operand" "")]
6672 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6673 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6674 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6678 (define_expand "vec_pack_trunc_v4si"
6679 [(match_operand:V8HI 0 "register_operand" "")
6680 (match_operand:V4SI 1 "register_operand" "")
6681 (match_operand:V4SI 2 "register_operand" "")]
6684 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6685 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6686 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6690 (define_expand "vec_pack_trunc_v2di"
6691 [(match_operand:V4SI 0 "register_operand" "")
6692 (match_operand:V2DI 1 "register_operand" "")
6693 (match_operand:V2DI 2 "register_operand" "")]
6696 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6697 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6698 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6702 (define_insn "*avx_packsswb"
6703 [(set (match_operand:V16QI 0 "register_operand" "=x")
6706 (match_operand:V8HI 1 "register_operand" "x"))
6708 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6710 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6711 [(set_attr "type" "sselog")
6712 (set_attr "prefix" "vex")
6713 (set_attr "mode" "TI")])
6715 (define_insn "sse2_packsswb"
6716 [(set (match_operand:V16QI 0 "register_operand" "=x")
6719 (match_operand:V8HI 1 "register_operand" "0"))
6721 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6723 "packsswb\t{%2, %0|%0, %2}"
6724 [(set_attr "type" "sselog")
6725 (set_attr "prefix_data16" "1")
6726 (set_attr "mode" "TI")])
6728 (define_insn "*avx_packssdw"
6729 [(set (match_operand:V8HI 0 "register_operand" "=x")
6732 (match_operand:V4SI 1 "register_operand" "x"))
6734 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6736 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6737 [(set_attr "type" "sselog")
6738 (set_attr "prefix" "vex")
6739 (set_attr "mode" "TI")])
6741 (define_insn "sse2_packssdw"
6742 [(set (match_operand:V8HI 0 "register_operand" "=x")
6745 (match_operand:V4SI 1 "register_operand" "0"))
6747 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6749 "packssdw\t{%2, %0|%0, %2}"
6750 [(set_attr "type" "sselog")
6751 (set_attr "prefix_data16" "1")
6752 (set_attr "mode" "TI")])
6754 (define_insn "*avx_packuswb"
6755 [(set (match_operand:V16QI 0 "register_operand" "=x")
6758 (match_operand:V8HI 1 "register_operand" "x"))
6760 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6762 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6763 [(set_attr "type" "sselog")
6764 (set_attr "prefix" "vex")
6765 (set_attr "mode" "TI")])
6767 (define_insn "sse2_packuswb"
6768 [(set (match_operand:V16QI 0 "register_operand" "=x")
6771 (match_operand:V8HI 1 "register_operand" "0"))
6773 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6775 "packuswb\t{%2, %0|%0, %2}"
6776 [(set_attr "type" "sselog")
6777 (set_attr "prefix_data16" "1")
6778 (set_attr "mode" "TI")])
6780 (define_insn "*avx_interleave_highv16qi"
6781 [(set (match_operand:V16QI 0 "register_operand" "=x")
6784 (match_operand:V16QI 1 "register_operand" "x")
6785 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6786 (parallel [(const_int 8) (const_int 24)
6787 (const_int 9) (const_int 25)
6788 (const_int 10) (const_int 26)
6789 (const_int 11) (const_int 27)
6790 (const_int 12) (const_int 28)
6791 (const_int 13) (const_int 29)
6792 (const_int 14) (const_int 30)
6793 (const_int 15) (const_int 31)])))]
6795 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6796 [(set_attr "type" "sselog")
6797 (set_attr "prefix" "vex")
6798 (set_attr "mode" "TI")])
6800 (define_insn "vec_interleave_highv16qi"
6801 [(set (match_operand:V16QI 0 "register_operand" "=x")
6804 (match_operand:V16QI 1 "register_operand" "0")
6805 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6806 (parallel [(const_int 8) (const_int 24)
6807 (const_int 9) (const_int 25)
6808 (const_int 10) (const_int 26)
6809 (const_int 11) (const_int 27)
6810 (const_int 12) (const_int 28)
6811 (const_int 13) (const_int 29)
6812 (const_int 14) (const_int 30)
6813 (const_int 15) (const_int 31)])))]
6815 "punpckhbw\t{%2, %0|%0, %2}"
6816 [(set_attr "type" "sselog")
6817 (set_attr "prefix_data16" "1")
6818 (set_attr "mode" "TI")])
6820 (define_insn "*avx_interleave_lowv16qi"
6821 [(set (match_operand:V16QI 0 "register_operand" "=x")
6824 (match_operand:V16QI 1 "register_operand" "x")
6825 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6826 (parallel [(const_int 0) (const_int 16)
6827 (const_int 1) (const_int 17)
6828 (const_int 2) (const_int 18)
6829 (const_int 3) (const_int 19)
6830 (const_int 4) (const_int 20)
6831 (const_int 5) (const_int 21)
6832 (const_int 6) (const_int 22)
6833 (const_int 7) (const_int 23)])))]
6835 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6836 [(set_attr "type" "sselog")
6837 (set_attr "prefix" "vex")
6838 (set_attr "mode" "TI")])
6840 (define_insn "vec_interleave_lowv16qi"
6841 [(set (match_operand:V16QI 0 "register_operand" "=x")
6844 (match_operand:V16QI 1 "register_operand" "0")
6845 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6846 (parallel [(const_int 0) (const_int 16)
6847 (const_int 1) (const_int 17)
6848 (const_int 2) (const_int 18)
6849 (const_int 3) (const_int 19)
6850 (const_int 4) (const_int 20)
6851 (const_int 5) (const_int 21)
6852 (const_int 6) (const_int 22)
6853 (const_int 7) (const_int 23)])))]
6855 "punpcklbw\t{%2, %0|%0, %2}"
6856 [(set_attr "type" "sselog")
6857 (set_attr "prefix_data16" "1")
6858 (set_attr "mode" "TI")])
6860 (define_insn "*avx_interleave_highv8hi"
6861 [(set (match_operand:V8HI 0 "register_operand" "=x")
6864 (match_operand:V8HI 1 "register_operand" "x")
6865 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6866 (parallel [(const_int 4) (const_int 12)
6867 (const_int 5) (const_int 13)
6868 (const_int 6) (const_int 14)
6869 (const_int 7) (const_int 15)])))]
6871 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6872 [(set_attr "type" "sselog")
6873 (set_attr "prefix" "vex")
6874 (set_attr "mode" "TI")])
6876 (define_insn "vec_interleave_highv8hi"
6877 [(set (match_operand:V8HI 0 "register_operand" "=x")
6880 (match_operand:V8HI 1 "register_operand" "0")
6881 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6882 (parallel [(const_int 4) (const_int 12)
6883 (const_int 5) (const_int 13)
6884 (const_int 6) (const_int 14)
6885 (const_int 7) (const_int 15)])))]
6887 "punpckhwd\t{%2, %0|%0, %2}"
6888 [(set_attr "type" "sselog")
6889 (set_attr "prefix_data16" "1")
6890 (set_attr "mode" "TI")])
6892 (define_insn "*avx_interleave_lowv8hi"
6893 [(set (match_operand:V8HI 0 "register_operand" "=x")
6896 (match_operand:V8HI 1 "register_operand" "x")
6897 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6898 (parallel [(const_int 0) (const_int 8)
6899 (const_int 1) (const_int 9)
6900 (const_int 2) (const_int 10)
6901 (const_int 3) (const_int 11)])))]
6903 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6904 [(set_attr "type" "sselog")
6905 (set_attr "prefix" "vex")
6906 (set_attr "mode" "TI")])
6908 (define_insn "vec_interleave_lowv8hi"
6909 [(set (match_operand:V8HI 0 "register_operand" "=x")
6912 (match_operand:V8HI 1 "register_operand" "0")
6913 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6914 (parallel [(const_int 0) (const_int 8)
6915 (const_int 1) (const_int 9)
6916 (const_int 2) (const_int 10)
6917 (const_int 3) (const_int 11)])))]
6919 "punpcklwd\t{%2, %0|%0, %2}"
6920 [(set_attr "type" "sselog")
6921 (set_attr "prefix_data16" "1")
6922 (set_attr "mode" "TI")])
6924 (define_insn "*avx_interleave_highv4si"
6925 [(set (match_operand:V4SI 0 "register_operand" "=x")
6928 (match_operand:V4SI 1 "register_operand" "x")
6929 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6930 (parallel [(const_int 2) (const_int 6)
6931 (const_int 3) (const_int 7)])))]
6933 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6934 [(set_attr "type" "sselog")
6935 (set_attr "prefix" "vex")
6936 (set_attr "mode" "TI")])
6938 (define_insn "vec_interleave_highv4si"
6939 [(set (match_operand:V4SI 0 "register_operand" "=x")
6942 (match_operand:V4SI 1 "register_operand" "0")
6943 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6944 (parallel [(const_int 2) (const_int 6)
6945 (const_int 3) (const_int 7)])))]
6947 "punpckhdq\t{%2, %0|%0, %2}"
6948 [(set_attr "type" "sselog")
6949 (set_attr "prefix_data16" "1")
6950 (set_attr "mode" "TI")])
6952 (define_insn "*avx_interleave_lowv4si"
6953 [(set (match_operand:V4SI 0 "register_operand" "=x")
6956 (match_operand:V4SI 1 "register_operand" "x")
6957 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6958 (parallel [(const_int 0) (const_int 4)
6959 (const_int 1) (const_int 5)])))]
6961 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6962 [(set_attr "type" "sselog")
6963 (set_attr "prefix" "vex")
6964 (set_attr "mode" "TI")])
6966 (define_insn "vec_interleave_lowv4si"
6967 [(set (match_operand:V4SI 0 "register_operand" "=x")
6970 (match_operand:V4SI 1 "register_operand" "0")
6971 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6972 (parallel [(const_int 0) (const_int 4)
6973 (const_int 1) (const_int 5)])))]
6975 "punpckldq\t{%2, %0|%0, %2}"
6976 [(set_attr "type" "sselog")
6977 (set_attr "prefix_data16" "1")
6978 (set_attr "mode" "TI")])
6980 (define_insn "*avx_pinsr<ssevecsize>"
6981 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6982 (vec_merge:SSEMODE124
6983 (vec_duplicate:SSEMODE124
6984 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6985 (match_operand:SSEMODE124 1 "register_operand" "x")
6986 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6989 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6990 if (MEM_P (operands[2]))
6991 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6993 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6995 [(set_attr "type" "sselog")
6996 (set (attr "prefix_extra")
6997 (if_then_else (match_operand:V8HI 0 "" "")
6999 (const_string "1")))
7000 (set_attr "length_immediate" "1")
7001 (set_attr "prefix" "vex")
7002 (set_attr "mode" "TI")])
7004 (define_insn "*sse4_1_pinsrb"
7005 [(set (match_operand:V16QI 0 "register_operand" "=x")
7007 (vec_duplicate:V16QI
7008 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7009 (match_operand:V16QI 1 "register_operand" "0")
7010 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7013 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7014 if (MEM_P (operands[2]))
7015 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
7017 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7019 [(set_attr "type" "sselog")
7020 (set_attr "prefix_extra" "1")
7021 (set_attr "length_immediate" "1")
7022 (set_attr "mode" "TI")])
7024 (define_insn "*sse2_pinsrw"
7025 [(set (match_operand:V8HI 0 "register_operand" "=x")
7028 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7029 (match_operand:V8HI 1 "register_operand" "0")
7030 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7034 if (MEM_P (operands[2]))
7035 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7037 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7039 [(set_attr "type" "sselog")
7040 (set_attr "prefix_data16" "1")
7041 (set_attr "length_immediate" "1")
7042 (set_attr "mode" "TI")])
7044 ;; It must come before sse2_loadld since it is preferred.
7045 (define_insn "*sse4_1_pinsrd"
7046 [(set (match_operand:V4SI 0 "register_operand" "=x")
7049 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7050 (match_operand:V4SI 1 "register_operand" "0")
7051 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7054 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7055 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7057 [(set_attr "type" "sselog")
7058 (set_attr "prefix_extra" "1")
7059 (set_attr "length_immediate" "1")
7060 (set_attr "mode" "TI")])
7062 (define_insn "*avx_pinsrq"
7063 [(set (match_operand:V2DI 0 "register_operand" "=x")
7066 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7067 (match_operand:V2DI 1 "register_operand" "x")
7068 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7069 "TARGET_AVX && TARGET_64BIT"
7071 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7072 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix_extra" "1")
7076 (set_attr "length_immediate" "1")
7077 (set_attr "prefix" "vex")
7078 (set_attr "mode" "TI")])
7080 (define_insn "*sse4_1_pinsrq"
7081 [(set (match_operand:V2DI 0 "register_operand" "=x")
7084 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7085 (match_operand:V2DI 1 "register_operand" "0")
7086 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7087 "TARGET_SSE4_1 && TARGET_64BIT"
7089 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7090 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7092 [(set_attr "type" "sselog")
7093 (set_attr "prefix_rex" "1")
7094 (set_attr "prefix_extra" "1")
7095 (set_attr "length_immediate" "1")
7096 (set_attr "mode" "TI")])
7098 (define_insn "*sse4_1_pextrb_<mode>"
7099 [(set (match_operand:SWI48 0 "register_operand" "=r")
7102 (match_operand:V16QI 1 "register_operand" "x")
7103 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7105 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7106 [(set_attr "type" "sselog")
7107 (set_attr "prefix_extra" "1")
7108 (set_attr "length_immediate" "1")
7109 (set_attr "prefix" "maybe_vex")
7110 (set_attr "mode" "TI")])
7112 (define_insn "*sse4_1_pextrb_memory"
7113 [(set (match_operand:QI 0 "memory_operand" "=m")
7115 (match_operand:V16QI 1 "register_operand" "x")
7116 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7118 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7119 [(set_attr "type" "sselog")
7120 (set_attr "prefix_extra" "1")
7121 (set_attr "length_immediate" "1")
7122 (set_attr "prefix" "maybe_vex")
7123 (set_attr "mode" "TI")])
7125 (define_insn "*sse2_pextrw_<mode>"
7126 [(set (match_operand:SWI48 0 "register_operand" "=r")
7129 (match_operand:V8HI 1 "register_operand" "x")
7130 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7132 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7133 [(set_attr "type" "sselog")
7134 (set_attr "prefix_data16" "1")
7135 (set_attr "length_immediate" "1")
7136 (set_attr "prefix" "maybe_vex")
7137 (set_attr "mode" "TI")])
7139 (define_insn "*sse4_1_pextrw_memory"
7140 [(set (match_operand:HI 0 "memory_operand" "=m")
7142 (match_operand:V8HI 1 "register_operand" "x")
7143 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7145 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7146 [(set_attr "type" "sselog")
7147 (set_attr "prefix_extra" "1")
7148 (set_attr "length_immediate" "1")
7149 (set_attr "prefix" "maybe_vex")
7150 (set_attr "mode" "TI")])
7152 (define_insn "*sse4_1_pextrd"
7153 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7155 (match_operand:V4SI 1 "register_operand" "x")
7156 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7158 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7159 [(set_attr "type" "sselog")
7160 (set_attr "prefix_extra" "1")
7161 (set_attr "length_immediate" "1")
7162 (set_attr "prefix" "maybe_vex")
7163 (set_attr "mode" "TI")])
7165 (define_insn "*sse4_1_pextrd_zext"
7166 [(set (match_operand:DI 0 "register_operand" "=r")
7169 (match_operand:V4SI 1 "register_operand" "x")
7170 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7171 "TARGET_64BIT && TARGET_SSE4_1"
7172 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7173 [(set_attr "type" "sselog")
7174 (set_attr "prefix_extra" "1")
7175 (set_attr "length_immediate" "1")
7176 (set_attr "prefix" "maybe_vex")
7177 (set_attr "mode" "TI")])
7179 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7180 (define_insn "*sse4_1_pextrq"
7181 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7183 (match_operand:V2DI 1 "register_operand" "x")
7184 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7185 "TARGET_SSE4_1 && TARGET_64BIT"
7186 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7187 [(set_attr "type" "sselog")
7188 (set_attr "prefix_rex" "1")
7189 (set_attr "prefix_extra" "1")
7190 (set_attr "length_immediate" "1")
7191 (set_attr "prefix" "maybe_vex")
7192 (set_attr "mode" "TI")])
7194 (define_expand "sse2_pshufd"
7195 [(match_operand:V4SI 0 "register_operand" "")
7196 (match_operand:V4SI 1 "nonimmediate_operand" "")
7197 (match_operand:SI 2 "const_int_operand" "")]
7200 int mask = INTVAL (operands[2]);
7201 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7202 GEN_INT ((mask >> 0) & 3),
7203 GEN_INT ((mask >> 2) & 3),
7204 GEN_INT ((mask >> 4) & 3),
7205 GEN_INT ((mask >> 6) & 3)));
7209 (define_insn "sse2_pshufd_1"
7210 [(set (match_operand:V4SI 0 "register_operand" "=x")
7212 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7213 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7214 (match_operand 3 "const_0_to_3_operand" "")
7215 (match_operand 4 "const_0_to_3_operand" "")
7216 (match_operand 5 "const_0_to_3_operand" "")])))]
7220 mask |= INTVAL (operands[2]) << 0;
7221 mask |= INTVAL (operands[3]) << 2;
7222 mask |= INTVAL (operands[4]) << 4;
7223 mask |= INTVAL (operands[5]) << 6;
7224 operands[2] = GEN_INT (mask);
7226 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7228 [(set_attr "type" "sselog1")
7229 (set_attr "prefix_data16" "1")
7230 (set_attr "prefix" "maybe_vex")
7231 (set_attr "length_immediate" "1")
7232 (set_attr "mode" "TI")])
7234 (define_expand "sse2_pshuflw"
7235 [(match_operand:V8HI 0 "register_operand" "")
7236 (match_operand:V8HI 1 "nonimmediate_operand" "")
7237 (match_operand:SI 2 "const_int_operand" "")]
7240 int mask = INTVAL (operands[2]);
7241 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7242 GEN_INT ((mask >> 0) & 3),
7243 GEN_INT ((mask >> 2) & 3),
7244 GEN_INT ((mask >> 4) & 3),
7245 GEN_INT ((mask >> 6) & 3)));
7249 (define_insn "sse2_pshuflw_1"
7250 [(set (match_operand:V8HI 0 "register_operand" "=x")
7252 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7253 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7254 (match_operand 3 "const_0_to_3_operand" "")
7255 (match_operand 4 "const_0_to_3_operand" "")
7256 (match_operand 5 "const_0_to_3_operand" "")
7264 mask |= INTVAL (operands[2]) << 0;
7265 mask |= INTVAL (operands[3]) << 2;
7266 mask |= INTVAL (operands[4]) << 4;
7267 mask |= INTVAL (operands[5]) << 6;
7268 operands[2] = GEN_INT (mask);
7270 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7272 [(set_attr "type" "sselog")
7273 (set_attr "prefix_data16" "0")
7274 (set_attr "prefix_rep" "1")
7275 (set_attr "prefix" "maybe_vex")
7276 (set_attr "length_immediate" "1")
7277 (set_attr "mode" "TI")])
7279 (define_expand "sse2_pshufhw"
7280 [(match_operand:V8HI 0 "register_operand" "")
7281 (match_operand:V8HI 1 "nonimmediate_operand" "")
7282 (match_operand:SI 2 "const_int_operand" "")]
7285 int mask = INTVAL (operands[2]);
7286 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7287 GEN_INT (((mask >> 0) & 3) + 4),
7288 GEN_INT (((mask >> 2) & 3) + 4),
7289 GEN_INT (((mask >> 4) & 3) + 4),
7290 GEN_INT (((mask >> 6) & 3) + 4)));
7294 (define_insn "sse2_pshufhw_1"
7295 [(set (match_operand:V8HI 0 "register_operand" "=x")
7297 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7298 (parallel [(const_int 0)
7302 (match_operand 2 "const_4_to_7_operand" "")
7303 (match_operand 3 "const_4_to_7_operand" "")
7304 (match_operand 4 "const_4_to_7_operand" "")
7305 (match_operand 5 "const_4_to_7_operand" "")])))]
7309 mask |= (INTVAL (operands[2]) - 4) << 0;
7310 mask |= (INTVAL (operands[3]) - 4) << 2;
7311 mask |= (INTVAL (operands[4]) - 4) << 4;
7312 mask |= (INTVAL (operands[5]) - 4) << 6;
7313 operands[2] = GEN_INT (mask);
7315 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7317 [(set_attr "type" "sselog")
7318 (set_attr "prefix_rep" "1")
7319 (set_attr "prefix_data16" "0")
7320 (set_attr "prefix" "maybe_vex")
7321 (set_attr "length_immediate" "1")
7322 (set_attr "mode" "TI")])
7324 (define_expand "sse2_loadd"
7325 [(set (match_operand:V4SI 0 "register_operand" "")
7328 (match_operand:SI 1 "nonimmediate_operand" ""))
7332 "operands[2] = CONST0_RTX (V4SImode);")
7334 (define_insn "*avx_loadld"
7335 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7338 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7339 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7343 vmovd\t{%2, %0|%0, %2}
7344 vmovd\t{%2, %0|%0, %2}
7345 vmovss\t{%2, %1, %0|%0, %1, %2}"
7346 [(set_attr "type" "ssemov")
7347 (set_attr "prefix" "vex")
7348 (set_attr "mode" "TI,TI,V4SF")])
7350 (define_insn "sse2_loadld"
7351 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7354 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7355 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7359 movd\t{%2, %0|%0, %2}
7360 movd\t{%2, %0|%0, %2}
7361 movss\t{%2, %0|%0, %2}
7362 movss\t{%2, %0|%0, %2}"
7363 [(set_attr "type" "ssemov")
7364 (set_attr "mode" "TI,TI,V4SF,SF")])
7366 (define_insn_and_split "sse2_stored"
7367 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7369 (match_operand:V4SI 1 "register_operand" "x,Yi")
7370 (parallel [(const_int 0)])))]
7373 "&& reload_completed
7374 && (TARGET_INTER_UNIT_MOVES
7375 || MEM_P (operands [0])
7376 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7377 [(set (match_dup 0) (match_dup 1))]
7378 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7380 (define_insn_and_split "*vec_ext_v4si_mem"
7381 [(set (match_operand:SI 0 "register_operand" "=r")
7383 (match_operand:V4SI 1 "memory_operand" "o")
7384 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7390 int i = INTVAL (operands[2]);
7392 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7396 (define_expand "sse_storeq"
7397 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7399 (match_operand:V2DI 1 "register_operand" "")
7400 (parallel [(const_int 0)])))]
7403 (define_insn "*sse2_storeq_rex64"
7404 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7406 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7407 (parallel [(const_int 0)])))]
7408 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7412 %vmov{q}\t{%1, %0|%0, %1}"
7413 [(set_attr "type" "*,*,imov")
7414 (set_attr "prefix" "*,*,maybe_vex")
7415 (set_attr "mode" "*,*,DI")])
7417 (define_insn "*sse2_storeq"
7418 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7420 (match_operand:V2DI 1 "register_operand" "x")
7421 (parallel [(const_int 0)])))]
7426 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7428 (match_operand:V2DI 1 "register_operand" "")
7429 (parallel [(const_int 0)])))]
7432 && (TARGET_INTER_UNIT_MOVES
7433 || MEM_P (operands [0])
7434 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7435 [(set (match_dup 0) (match_dup 1))]
7436 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7438 (define_insn "*vec_extractv2di_1_rex64_avx"
7439 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7441 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7442 (parallel [(const_int 1)])))]
7445 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7447 vmovhps\t{%1, %0|%0, %1}
7448 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7449 vmovq\t{%H1, %0|%0, %H1}
7450 vmov{q}\t{%H1, %0|%0, %H1}"
7451 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7452 (set_attr "length_immediate" "*,1,*,*")
7453 (set_attr "memory" "*,none,*,*")
7454 (set_attr "prefix" "vex")
7455 (set_attr "mode" "V2SF,TI,TI,DI")])
7457 (define_insn "*vec_extractv2di_1_rex64"
7458 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7460 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7461 (parallel [(const_int 1)])))]
7462 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7464 movhps\t{%1, %0|%0, %1}
7465 psrldq\t{$8, %0|%0, 8}
7466 movq\t{%H1, %0|%0, %H1}
7467 mov{q}\t{%H1, %0|%0, %H1}"
7468 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7469 (set_attr "length_immediate" "*,1,*,*")
7470 (set_attr "memory" "*,none,*,*")
7471 (set_attr "mode" "V2SF,TI,TI,DI")])
7473 (define_insn "*vec_extractv2di_1_avx"
7474 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7476 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7477 (parallel [(const_int 1)])))]
7480 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7482 vmovhps\t{%1, %0|%0, %1}
7483 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7484 vmovq\t{%H1, %0|%0, %H1}"
7485 [(set_attr "type" "ssemov,sseishft1,ssemov")
7486 (set_attr "length_immediate" "*,1,*")
7487 (set_attr "memory" "*,none,*")
7488 (set_attr "prefix" "vex")
7489 (set_attr "mode" "V2SF,TI,TI")])
7491 (define_insn "*vec_extractv2di_1_sse2"
7492 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7494 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7495 (parallel [(const_int 1)])))]
7497 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7499 movhps\t{%1, %0|%0, %1}
7500 psrldq\t{$8, %0|%0, 8}
7501 movq\t{%H1, %0|%0, %H1}"
7502 [(set_attr "type" "ssemov,sseishft1,ssemov")
7503 (set_attr "length_immediate" "*,1,*")
7504 (set_attr "memory" "*,none,*")
7505 (set_attr "mode" "V2SF,TI,TI")])
7507 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7508 (define_insn "*vec_extractv2di_1_sse"
7509 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7511 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7512 (parallel [(const_int 1)])))]
7513 "!TARGET_SSE2 && TARGET_SSE
7514 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7516 movhps\t{%1, %0|%0, %1}
7517 movhlps\t{%1, %0|%0, %1}
7518 movlps\t{%H1, %0|%0, %H1}"
7519 [(set_attr "type" "ssemov")
7520 (set_attr "mode" "V2SF,V4SF,V2SF")])
7522 (define_insn "*vec_dupv4si_avx"
7523 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7525 (match_operand:SI 1 "register_operand" "x,m")))]
7528 vpshufd\t{$0, %1, %0|%0, %1, 0}
7529 vbroadcastss\t{%1, %0|%0, %1}"
7530 [(set_attr "type" "sselog1,ssemov")
7531 (set_attr "length_immediate" "1,0")
7532 (set_attr "prefix_extra" "0,1")
7533 (set_attr "prefix" "vex")
7534 (set_attr "mode" "TI,V4SF")])
7536 (define_insn "*vec_dupv4si"
7537 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7539 (match_operand:SI 1 "register_operand" " Y2,0")))]
7542 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7543 shufps\t{$0, %0, %0|%0, %0, 0}"
7544 [(set_attr "type" "sselog1")
7545 (set_attr "length_immediate" "1")
7546 (set_attr "mode" "TI,V4SF")])
7548 (define_insn "*vec_dupv2di_avx"
7549 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7551 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7554 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7555 vmovddup\t{%1, %0|%0, %1}"
7556 [(set_attr "type" "sselog1")
7557 (set_attr "prefix" "vex")
7558 (set_attr "mode" "TI,DF")])
7560 (define_insn "*vec_dupv2di_sse3"
7561 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7563 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7567 movddup\t{%1, %0|%0, %1}"
7568 [(set_attr "type" "sselog1")
7569 (set_attr "mode" "TI,DF")])
7571 (define_insn "*vec_dupv2di"
7572 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7574 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7579 [(set_attr "type" "sselog1,ssemov")
7580 (set_attr "mode" "TI,V4SF")])
7582 (define_insn "*vec_concatv2si_avx"
7583 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7585 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7586 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7589 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7590 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7591 vmovd\t{%1, %0|%0, %1}
7592 punpckldq\t{%2, %0|%0, %2}
7593 movd\t{%1, %0|%0, %1}"
7594 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7595 (set_attr "prefix_extra" "1,*,*,*,*")
7596 (set_attr "length_immediate" "1,*,*,*,*")
7597 (set (attr "prefix")
7598 (if_then_else (eq_attr "alternative" "3,4")
7599 (const_string "orig")
7600 (const_string "vex")))
7601 (set_attr "mode" "TI,TI,TI,DI,DI")])
7603 (define_insn "*vec_concatv2si_sse4_1"
7604 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7606 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7607 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7610 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7611 punpckldq\t{%2, %0|%0, %2}
7612 movd\t{%1, %0|%0, %1}
7613 punpckldq\t{%2, %0|%0, %2}
7614 movd\t{%1, %0|%0, %1}"
7615 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7616 (set_attr "prefix_extra" "1,*,*,*,*")
7617 (set_attr "length_immediate" "1,*,*,*,*")
7618 (set_attr "mode" "TI,TI,TI,DI,DI")])
7620 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7621 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7622 ;; alternatives pretty much forces the MMX alternative to be chosen.
7623 (define_insn "*vec_concatv2si_sse2"
7624 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7626 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7627 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7630 punpckldq\t{%2, %0|%0, %2}
7631 movd\t{%1, %0|%0, %1}
7632 punpckldq\t{%2, %0|%0, %2}
7633 movd\t{%1, %0|%0, %1}"
7634 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7635 (set_attr "mode" "TI,TI,DI,DI")])
7637 (define_insn "*vec_concatv2si_sse"
7638 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7640 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7641 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7644 unpcklps\t{%2, %0|%0, %2}
7645 movss\t{%1, %0|%0, %1}
7646 punpckldq\t{%2, %0|%0, %2}
7647 movd\t{%1, %0|%0, %1}"
7648 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7649 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7651 (define_insn "*vec_concatv4si_1_avx"
7652 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7654 (match_operand:V2SI 1 "register_operand" " x,x")
7655 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7658 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7659 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7660 [(set_attr "type" "sselog,ssemov")
7661 (set_attr "prefix" "vex")
7662 (set_attr "mode" "TI,V2SF")])
7664 (define_insn "*vec_concatv4si_1"
7665 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7667 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7668 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7671 punpcklqdq\t{%2, %0|%0, %2}
7672 movlhps\t{%2, %0|%0, %2}
7673 movhps\t{%2, %0|%0, %2}"
7674 [(set_attr "type" "sselog,ssemov,ssemov")
7675 (set_attr "mode" "TI,V4SF,V2SF")])
7677 (define_insn "*vec_concatv2di_avx"
7678 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7680 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7681 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7682 "!TARGET_64BIT && TARGET_AVX"
7684 vmovq\t{%1, %0|%0, %1}
7685 movq2dq\t{%1, %0|%0, %1}
7686 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7687 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7688 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7689 (set (attr "prefix")
7690 (if_then_else (eq_attr "alternative" "1")
7691 (const_string "orig")
7692 (const_string "vex")))
7693 (set_attr "mode" "TI,TI,TI,V2SF")])
7695 (define_insn "vec_concatv2di"
7696 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7698 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7699 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7700 "!TARGET_64BIT && TARGET_SSE"
7702 movq\t{%1, %0|%0, %1}
7703 movq2dq\t{%1, %0|%0, %1}
7704 punpcklqdq\t{%2, %0|%0, %2}
7705 movlhps\t{%2, %0|%0, %2}
7706 movhps\t{%2, %0|%0, %2}"
7707 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7708 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7710 (define_insn "*vec_concatv2di_rex64_avx"
7711 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7713 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7714 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7715 "TARGET_64BIT && TARGET_AVX"
7717 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7718 vmovq\t{%1, %0|%0, %1}
7719 vmovq\t{%1, %0|%0, %1}
7720 movq2dq\t{%1, %0|%0, %1}
7721 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7722 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7723 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7724 (set_attr "prefix_extra" "1,*,*,*,*,*")
7725 (set_attr "length_immediate" "1,*,*,*,*,*")
7726 (set (attr "prefix")
7727 (if_then_else (eq_attr "alternative" "3")
7728 (const_string "orig")
7729 (const_string "vex")))
7730 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7732 (define_insn "*vec_concatv2di_rex64_sse4_1"
7733 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7735 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7736 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7737 "TARGET_64BIT && TARGET_SSE4_1"
7739 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7740 movq\t{%1, %0|%0, %1}
7741 movd\t{%1, %0|%0, %1}
7742 movq2dq\t{%1, %0|%0, %1}
7743 punpcklqdq\t{%2, %0|%0, %2}
7744 movlhps\t{%2, %0|%0, %2}
7745 movhps\t{%2, %0|%0, %2}"
7746 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7747 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7748 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7749 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7750 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7752 (define_insn "*vec_concatv2di_rex64_sse"
7753 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7755 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7756 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7757 "TARGET_64BIT && TARGET_SSE"
7759 movq\t{%1, %0|%0, %1}
7760 movd\t{%1, %0|%0, %1}
7761 movq2dq\t{%1, %0|%0, %1}
7762 punpcklqdq\t{%2, %0|%0, %2}
7763 movlhps\t{%2, %0|%0, %2}
7764 movhps\t{%2, %0|%0, %2}"
7765 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7766 (set_attr "prefix_rex" "*,1,*,*,*,*")
7767 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7769 (define_expand "vec_unpacku_hi_v16qi"
7770 [(match_operand:V8HI 0 "register_operand" "")
7771 (match_operand:V16QI 1 "register_operand" "")]
7775 ix86_expand_sse4_unpack (operands, true, true);
7777 ix86_expand_sse_unpack (operands, true, true);
7781 (define_expand "vec_unpacks_hi_v16qi"
7782 [(match_operand:V8HI 0 "register_operand" "")
7783 (match_operand:V16QI 1 "register_operand" "")]
7787 ix86_expand_sse4_unpack (operands, false, true);
7789 ix86_expand_sse_unpack (operands, false, true);
7793 (define_expand "vec_unpacku_lo_v16qi"
7794 [(match_operand:V8HI 0 "register_operand" "")
7795 (match_operand:V16QI 1 "register_operand" "")]
7799 ix86_expand_sse4_unpack (operands, true, false);
7801 ix86_expand_sse_unpack (operands, true, false);
7805 (define_expand "vec_unpacks_lo_v16qi"
7806 [(match_operand:V8HI 0 "register_operand" "")
7807 (match_operand:V16QI 1 "register_operand" "")]
7811 ix86_expand_sse4_unpack (operands, false, false);
7813 ix86_expand_sse_unpack (operands, false, false);
7817 (define_expand "vec_unpacku_hi_v8hi"
7818 [(match_operand:V4SI 0 "register_operand" "")
7819 (match_operand:V8HI 1 "register_operand" "")]
7823 ix86_expand_sse4_unpack (operands, true, true);
7825 ix86_expand_sse_unpack (operands, true, true);
7829 (define_expand "vec_unpacks_hi_v8hi"
7830 [(match_operand:V4SI 0 "register_operand" "")
7831 (match_operand:V8HI 1 "register_operand" "")]
7835 ix86_expand_sse4_unpack (operands, false, true);
7837 ix86_expand_sse_unpack (operands, false, true);
7841 (define_expand "vec_unpacku_lo_v8hi"
7842 [(match_operand:V4SI 0 "register_operand" "")
7843 (match_operand:V8HI 1 "register_operand" "")]
7847 ix86_expand_sse4_unpack (operands, true, false);
7849 ix86_expand_sse_unpack (operands, true, false);
7853 (define_expand "vec_unpacks_lo_v8hi"
7854 [(match_operand:V4SI 0 "register_operand" "")
7855 (match_operand:V8HI 1 "register_operand" "")]
7859 ix86_expand_sse4_unpack (operands, false, false);
7861 ix86_expand_sse_unpack (operands, false, false);
7865 (define_expand "vec_unpacku_hi_v4si"
7866 [(match_operand:V2DI 0 "register_operand" "")
7867 (match_operand:V4SI 1 "register_operand" "")]
7871 ix86_expand_sse4_unpack (operands, true, true);
7873 ix86_expand_sse_unpack (operands, true, true);
7877 (define_expand "vec_unpacks_hi_v4si"
7878 [(match_operand:V2DI 0 "register_operand" "")
7879 (match_operand:V4SI 1 "register_operand" "")]
7883 ix86_expand_sse4_unpack (operands, false, true);
7885 ix86_expand_sse_unpack (operands, false, true);
7889 (define_expand "vec_unpacku_lo_v4si"
7890 [(match_operand:V2DI 0 "register_operand" "")
7891 (match_operand:V4SI 1 "register_operand" "")]
7895 ix86_expand_sse4_unpack (operands, true, false);
7897 ix86_expand_sse_unpack (operands, true, false);
7901 (define_expand "vec_unpacks_lo_v4si"
7902 [(match_operand:V2DI 0 "register_operand" "")
7903 (match_operand:V4SI 1 "register_operand" "")]
7907 ix86_expand_sse4_unpack (operands, false, false);
7909 ix86_expand_sse_unpack (operands, false, false);
7913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7917 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7919 (define_expand "sse2_uavgv16qi3"
7920 [(set (match_operand:V16QI 0 "register_operand" "")
7926 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7928 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7929 (const_vector:V16QI [(const_int 1) (const_int 1)
7930 (const_int 1) (const_int 1)
7931 (const_int 1) (const_int 1)
7932 (const_int 1) (const_int 1)
7933 (const_int 1) (const_int 1)
7934 (const_int 1) (const_int 1)
7935 (const_int 1) (const_int 1)
7936 (const_int 1) (const_int 1)]))
7939 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7941 (define_insn "*avx_uavgv16qi3"
7942 [(set (match_operand:V16QI 0 "register_operand" "=x")
7948 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7950 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7951 (const_vector:V16QI [(const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)
7954 (const_int 1) (const_int 1)
7955 (const_int 1) (const_int 1)
7956 (const_int 1) (const_int 1)
7957 (const_int 1) (const_int 1)
7958 (const_int 1) (const_int 1)]))
7960 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7961 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7962 [(set_attr "type" "sseiadd")
7963 (set_attr "prefix" "vex")
7964 (set_attr "mode" "TI")])
7966 (define_insn "*sse2_uavgv16qi3"
7967 [(set (match_operand:V16QI 0 "register_operand" "=x")
7973 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7975 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7976 (const_vector:V16QI [(const_int 1) (const_int 1)
7977 (const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)
7980 (const_int 1) (const_int 1)
7981 (const_int 1) (const_int 1)
7982 (const_int 1) (const_int 1)
7983 (const_int 1) (const_int 1)]))
7985 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7986 "pavgb\t{%2, %0|%0, %2}"
7987 [(set_attr "type" "sseiadd")
7988 (set_attr "prefix_data16" "1")
7989 (set_attr "mode" "TI")])
7991 (define_expand "sse2_uavgv8hi3"
7992 [(set (match_operand:V8HI 0 "register_operand" "")
7998 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8000 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8001 (const_vector:V8HI [(const_int 1) (const_int 1)
8002 (const_int 1) (const_int 1)
8003 (const_int 1) (const_int 1)
8004 (const_int 1) (const_int 1)]))
8007 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8009 (define_insn "*avx_uavgv8hi3"
8010 [(set (match_operand:V8HI 0 "register_operand" "=x")
8016 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8018 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8019 (const_vector:V8HI [(const_int 1) (const_int 1)
8020 (const_int 1) (const_int 1)
8021 (const_int 1) (const_int 1)
8022 (const_int 1) (const_int 1)]))
8024 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8025 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8026 [(set_attr "type" "sseiadd")
8027 (set_attr "prefix" "vex")
8028 (set_attr "mode" "TI")])
8030 (define_insn "*sse2_uavgv8hi3"
8031 [(set (match_operand:V8HI 0 "register_operand" "=x")
8037 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8040 (const_vector:V8HI [(const_int 1) (const_int 1)
8041 (const_int 1) (const_int 1)
8042 (const_int 1) (const_int 1)
8043 (const_int 1) (const_int 1)]))
8045 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8046 "pavgw\t{%2, %0|%0, %2}"
8047 [(set_attr "type" "sseiadd")
8048 (set_attr "prefix_data16" "1")
8049 (set_attr "mode" "TI")])
8051 ;; The correct representation for this is absolutely enormous, and
8052 ;; surely not generally useful.
8053 (define_insn "*avx_psadbw"
8054 [(set (match_operand:V2DI 0 "register_operand" "=x")
8055 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8056 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8059 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8060 [(set_attr "type" "sseiadd")
8061 (set_attr "prefix" "vex")
8062 (set_attr "mode" "TI")])
8064 (define_insn "sse2_psadbw"
8065 [(set (match_operand:V2DI 0 "register_operand" "=x")
8066 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8067 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8070 "psadbw\t{%2, %0|%0, %2}"
8071 [(set_attr "type" "sseiadd")
8072 (set_attr "atom_unit" "simul")
8073 (set_attr "prefix_data16" "1")
8074 (set_attr "mode" "TI")])
8076 (define_insn "avx_movmsk<ssemodesuffix>256"
8077 [(set (match_operand:SI 0 "register_operand" "=r")
8079 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8081 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8082 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8083 [(set_attr "type" "ssecvt")
8084 (set_attr "prefix" "vex")
8085 (set_attr "mode" "<MODE>")])
8087 (define_insn "<sse>_movmsk<ssemodesuffix>"
8088 [(set (match_operand:SI 0 "register_operand" "=r")
8090 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8092 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8093 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8094 [(set_attr "type" "ssemov")
8095 (set_attr "prefix" "maybe_vex")
8096 (set_attr "mode" "<MODE>")])
8098 (define_insn "sse2_pmovmskb"
8099 [(set (match_operand:SI 0 "register_operand" "=r")
8100 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8103 "%vpmovmskb\t{%1, %0|%0, %1}"
8104 [(set_attr "type" "ssemov")
8105 (set_attr "prefix_data16" "1")
8106 (set_attr "prefix" "maybe_vex")
8107 (set_attr "mode" "SI")])
8109 (define_expand "sse2_maskmovdqu"
8110 [(set (match_operand:V16QI 0 "memory_operand" "")
8111 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8112 (match_operand:V16QI 2 "register_operand" "")
8117 (define_insn "*sse2_maskmovdqu"
8118 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8119 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8120 (match_operand:V16QI 2 "register_operand" "x")
8121 (mem:V16QI (match_dup 0))]
8123 "TARGET_SSE2 && !TARGET_64BIT"
8124 ;; @@@ check ordering of operands in intel/nonintel syntax
8125 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8126 [(set_attr "type" "ssemov")
8127 (set_attr "prefix_data16" "1")
8128 ;; The implicit %rdi operand confuses default length_vex computation.
8129 (set_attr "length_vex" "3")
8130 (set_attr "prefix" "maybe_vex")
8131 (set_attr "mode" "TI")])
8133 (define_insn "*sse2_maskmovdqu_rex64"
8134 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8135 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8136 (match_operand:V16QI 2 "register_operand" "x")
8137 (mem:V16QI (match_dup 0))]
8139 "TARGET_SSE2 && TARGET_64BIT"
8140 ;; @@@ check ordering of operands in intel/nonintel syntax
8141 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8142 [(set_attr "type" "ssemov")
8143 (set_attr "prefix_data16" "1")
8144 ;; The implicit %rdi operand confuses default length_vex computation.
8145 (set (attr "length_vex")
8146 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8147 (set_attr "prefix" "maybe_vex")
8148 (set_attr "mode" "TI")])
8150 (define_insn "sse_ldmxcsr"
8151 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8155 [(set_attr "type" "sse")
8156 (set_attr "atom_sse_attr" "mxcsr")
8157 (set_attr "prefix" "maybe_vex")
8158 (set_attr "memory" "load")])
8160 (define_insn "sse_stmxcsr"
8161 [(set (match_operand:SI 0 "memory_operand" "=m")
8162 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8165 [(set_attr "type" "sse")
8166 (set_attr "atom_sse_attr" "mxcsr")
8167 (set_attr "prefix" "maybe_vex")
8168 (set_attr "memory" "store")])
8170 (define_expand "sse_sfence"
8172 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8173 "TARGET_SSE || TARGET_3DNOW_A"
8175 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8176 MEM_VOLATILE_P (operands[0]) = 1;
8179 (define_insn "*sse_sfence"
8180 [(set (match_operand:BLK 0 "" "")
8181 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8182 "TARGET_SSE || TARGET_3DNOW_A"
8184 [(set_attr "type" "sse")
8185 (set_attr "length_address" "0")
8186 (set_attr "atom_sse_attr" "fence")
8187 (set_attr "memory" "unknown")])
8189 (define_insn "sse2_clflush"
8190 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8194 [(set_attr "type" "sse")
8195 (set_attr "atom_sse_attr" "fence")
8196 (set_attr "memory" "unknown")])
8198 (define_expand "sse2_mfence"
8200 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8203 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8204 MEM_VOLATILE_P (operands[0]) = 1;
8207 (define_insn "*sse2_mfence"
8208 [(set (match_operand:BLK 0 "" "")
8209 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8210 "TARGET_64BIT || TARGET_SSE2"
8212 [(set_attr "type" "sse")
8213 (set_attr "length_address" "0")
8214 (set_attr "atom_sse_attr" "fence")
8215 (set_attr "memory" "unknown")])
8217 (define_expand "sse2_lfence"
8219 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8222 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8223 MEM_VOLATILE_P (operands[0]) = 1;
8226 (define_insn "*sse2_lfence"
8227 [(set (match_operand:BLK 0 "" "")
8228 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8231 [(set_attr "type" "sse")
8232 (set_attr "length_address" "0")
8233 (set_attr "atom_sse_attr" "lfence")
8234 (set_attr "memory" "unknown")])
8236 (define_insn "sse3_mwait"
8237 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8238 (match_operand:SI 1 "register_operand" "c")]
8241 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8242 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8243 ;; we only need to set up 32bit registers.
8245 [(set_attr "length" "3")])
8247 (define_insn "sse3_monitor"
8248 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8249 (match_operand:SI 1 "register_operand" "c")
8250 (match_operand:SI 2 "register_operand" "d")]
8252 "TARGET_SSE3 && !TARGET_64BIT"
8253 "monitor\t%0, %1, %2"
8254 [(set_attr "length" "3")])
8256 (define_insn "sse3_monitor64"
8257 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8258 (match_operand:SI 1 "register_operand" "c")
8259 (match_operand:SI 2 "register_operand" "d")]
8261 "TARGET_SSE3 && TARGET_64BIT"
8262 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8263 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8264 ;; zero extended to 64bit, we only need to set up 32bit registers.
8266 [(set_attr "length" "3")])
8268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8270 ;; SSSE3 instructions
8272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8274 (define_insn "*avx_phaddwv8hi3"
8275 [(set (match_operand:V8HI 0 "register_operand" "=x")
8281 (match_operand:V8HI 1 "register_operand" "x")
8282 (parallel [(const_int 0)]))
8283 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8285 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8289 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8290 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8292 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8293 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8298 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8299 (parallel [(const_int 0)]))
8300 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8302 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8303 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8306 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8307 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8309 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8310 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8312 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8313 [(set_attr "type" "sseiadd")
8314 (set_attr "prefix_extra" "1")
8315 (set_attr "prefix" "vex")
8316 (set_attr "mode" "TI")])
8318 (define_insn "ssse3_phaddwv8hi3"
8319 [(set (match_operand:V8HI 0 "register_operand" "=x")
8325 (match_operand:V8HI 1 "register_operand" "0")
8326 (parallel [(const_int 0)]))
8327 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8329 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8330 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8336 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8343 (parallel [(const_int 0)]))
8344 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8346 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8347 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8350 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8353 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8356 "phaddw\t{%2, %0|%0, %2}"
8357 [(set_attr "type" "sseiadd")
8358 (set_attr "atom_unit" "complex")
8359 (set_attr "prefix_data16" "1")
8360 (set_attr "prefix_extra" "1")
8361 (set_attr "mode" "TI")])
8363 (define_insn "ssse3_phaddwv4hi3"
8364 [(set (match_operand:V4HI 0 "register_operand" "=y")
8369 (match_operand:V4HI 1 "register_operand" "0")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8373 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8378 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8379 (parallel [(const_int 0)]))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8382 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8383 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8385 "phaddw\t{%2, %0|%0, %2}"
8386 [(set_attr "type" "sseiadd")
8387 (set_attr "atom_unit" "complex")
8388 (set_attr "prefix_extra" "1")
8389 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8390 (set_attr "mode" "DI")])
8392 (define_insn "*avx_phadddv4si3"
8393 [(set (match_operand:V4SI 0 "register_operand" "=x")
8398 (match_operand:V4SI 1 "register_operand" "x")
8399 (parallel [(const_int 0)]))
8400 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8402 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8403 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8407 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8408 (parallel [(const_int 0)]))
8409 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8411 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8412 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8414 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8415 [(set_attr "type" "sseiadd")
8416 (set_attr "prefix_extra" "1")
8417 (set_attr "prefix" "vex")
8418 (set_attr "mode" "TI")])
8420 (define_insn "ssse3_phadddv4si3"
8421 [(set (match_operand:V4SI 0 "register_operand" "=x")
8426 (match_operand:V4SI 1 "register_operand" "0")
8427 (parallel [(const_int 0)]))
8428 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8430 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8431 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8435 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8436 (parallel [(const_int 0)]))
8437 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8439 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8440 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8442 "phaddd\t{%2, %0|%0, %2}"
8443 [(set_attr "type" "sseiadd")
8444 (set_attr "atom_unit" "complex")
8445 (set_attr "prefix_data16" "1")
8446 (set_attr "prefix_extra" "1")
8447 (set_attr "mode" "TI")])
8449 (define_insn "ssse3_phadddv2si3"
8450 [(set (match_operand:V2SI 0 "register_operand" "=y")
8454 (match_operand:V2SI 1 "register_operand" "0")
8455 (parallel [(const_int 0)]))
8456 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8459 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8460 (parallel [(const_int 0)]))
8461 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8463 "phaddd\t{%2, %0|%0, %2}"
8464 [(set_attr "type" "sseiadd")
8465 (set_attr "atom_unit" "complex")
8466 (set_attr "prefix_extra" "1")
8467 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8468 (set_attr "mode" "DI")])
8470 (define_insn "*avx_phaddswv8hi3"
8471 [(set (match_operand:V8HI 0 "register_operand" "=x")
8477 (match_operand:V8HI 1 "register_operand" "x")
8478 (parallel [(const_int 0)]))
8479 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8481 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8482 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8485 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8486 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8488 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8489 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8494 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8495 (parallel [(const_int 0)]))
8496 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8498 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8499 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8502 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8503 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8505 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8506 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8508 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8509 [(set_attr "type" "sseiadd")
8510 (set_attr "prefix_extra" "1")
8511 (set_attr "prefix" "vex")
8512 (set_attr "mode" "TI")])
8514 (define_insn "ssse3_phaddswv8hi3"
8515 [(set (match_operand:V8HI 0 "register_operand" "=x")
8521 (match_operand:V8HI 1 "register_operand" "0")
8522 (parallel [(const_int 0)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8525 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8532 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8538 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8539 (parallel [(const_int 0)]))
8540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8552 "phaddsw\t{%2, %0|%0, %2}"
8553 [(set_attr "type" "sseiadd")
8554 (set_attr "atom_unit" "complex")
8555 (set_attr "prefix_data16" "1")
8556 (set_attr "prefix_extra" "1")
8557 (set_attr "mode" "TI")])
8559 (define_insn "ssse3_phaddswv4hi3"
8560 [(set (match_operand:V4HI 0 "register_operand" "=y")
8565 (match_operand:V4HI 1 "register_operand" "0")
8566 (parallel [(const_int 0)]))
8567 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8574 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8575 (parallel [(const_int 0)]))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8578 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8579 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8581 "phaddsw\t{%2, %0|%0, %2}"
8582 [(set_attr "type" "sseiadd")
8583 (set_attr "atom_unit" "complex")
8584 (set_attr "prefix_extra" "1")
8585 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8586 (set_attr "mode" "DI")])
8588 (define_insn "*avx_phsubwv8hi3"
8589 [(set (match_operand:V8HI 0 "register_operand" "=x")
8595 (match_operand:V8HI 1 "register_operand" "x")
8596 (parallel [(const_int 0)]))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8603 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8604 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8606 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8607 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8612 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8613 (parallel [(const_int 0)]))
8614 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8616 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8621 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8626 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8627 [(set_attr "type" "sseiadd")
8628 (set_attr "prefix_extra" "1")
8629 (set_attr "prefix" "vex")
8630 (set_attr "mode" "TI")])
8632 (define_insn "ssse3_phsubwv8hi3"
8633 [(set (match_operand:V8HI 0 "register_operand" "=x")
8639 (match_operand:V8HI 1 "register_operand" "0")
8640 (parallel [(const_int 0)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8643 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8650 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8651 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8656 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8657 (parallel [(const_int 0)]))
8658 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8660 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8661 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8664 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8665 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8667 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8670 "phsubw\t{%2, %0|%0, %2}"
8671 [(set_attr "type" "sseiadd")
8672 (set_attr "atom_unit" "complex")
8673 (set_attr "prefix_data16" "1")
8674 (set_attr "prefix_extra" "1")
8675 (set_attr "mode" "TI")])
8677 (define_insn "ssse3_phsubwv4hi3"
8678 [(set (match_operand:V4HI 0 "register_operand" "=y")
8683 (match_operand:V4HI 1 "register_operand" "0")
8684 (parallel [(const_int 0)]))
8685 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8687 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8688 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8692 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8693 (parallel [(const_int 0)]))
8694 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8696 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8697 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8699 "phsubw\t{%2, %0|%0, %2}"
8700 [(set_attr "type" "sseiadd")
8701 (set_attr "atom_unit" "complex")
8702 (set_attr "prefix_extra" "1")
8703 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8704 (set_attr "mode" "DI")])
8706 (define_insn "*avx_phsubdv4si3"
8707 [(set (match_operand:V4SI 0 "register_operand" "=x")
8712 (match_operand:V4SI 1 "register_operand" "x")
8713 (parallel [(const_int 0)]))
8714 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8716 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8717 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8721 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8722 (parallel [(const_int 0)]))
8723 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8725 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8726 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8728 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8729 [(set_attr "type" "sseiadd")
8730 (set_attr "prefix_extra" "1")
8731 (set_attr "prefix" "vex")
8732 (set_attr "mode" "TI")])
8734 (define_insn "ssse3_phsubdv4si3"
8735 [(set (match_operand:V4SI 0 "register_operand" "=x")
8740 (match_operand:V4SI 1 "register_operand" "0")
8741 (parallel [(const_int 0)]))
8742 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8744 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8745 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8749 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8750 (parallel [(const_int 0)]))
8751 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8753 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8754 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8756 "phsubd\t{%2, %0|%0, %2}"
8757 [(set_attr "type" "sseiadd")
8758 (set_attr "atom_unit" "complex")
8759 (set_attr "prefix_data16" "1")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "mode" "TI")])
8763 (define_insn "ssse3_phsubdv2si3"
8764 [(set (match_operand:V2SI 0 "register_operand" "=y")
8768 (match_operand:V2SI 1 "register_operand" "0")
8769 (parallel [(const_int 0)]))
8770 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8773 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8774 (parallel [(const_int 0)]))
8775 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8777 "phsubd\t{%2, %0|%0, %2}"
8778 [(set_attr "type" "sseiadd")
8779 (set_attr "atom_unit" "complex")
8780 (set_attr "prefix_extra" "1")
8781 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8782 (set_attr "mode" "DI")])
8784 (define_insn "*avx_phsubswv8hi3"
8785 [(set (match_operand:V8HI 0 "register_operand" "=x")
8791 (match_operand:V8HI 1 "register_operand" "x")
8792 (parallel [(const_int 0)]))
8793 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8795 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8796 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8799 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8800 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8802 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8803 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8808 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8809 (parallel [(const_int 0)]))
8810 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8812 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8813 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8817 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8819 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8822 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8823 [(set_attr "type" "sseiadd")
8824 (set_attr "prefix_extra" "1")
8825 (set_attr "prefix" "vex")
8826 (set_attr "mode" "TI")])
8828 (define_insn "ssse3_phsubswv8hi3"
8829 [(set (match_operand:V8HI 0 "register_operand" "=x")
8835 (match_operand:V8HI 1 "register_operand" "0")
8836 (parallel [(const_int 0)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8852 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8853 (parallel [(const_int 0)]))
8854 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8856 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8857 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8860 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8863 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8866 "phsubsw\t{%2, %0|%0, %2}"
8867 [(set_attr "type" "sseiadd")
8868 (set_attr "atom_unit" "complex")
8869 (set_attr "prefix_data16" "1")
8870 (set_attr "prefix_extra" "1")
8871 (set_attr "mode" "TI")])
8873 (define_insn "ssse3_phsubswv4hi3"
8874 [(set (match_operand:V4HI 0 "register_operand" "=y")
8879 (match_operand:V4HI 1 "register_operand" "0")
8880 (parallel [(const_int 0)]))
8881 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8883 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8884 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8888 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8889 (parallel [(const_int 0)]))
8890 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8892 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8893 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8895 "phsubsw\t{%2, %0|%0, %2}"
8896 [(set_attr "type" "sseiadd")
8897 (set_attr "atom_unit" "complex")
8898 (set_attr "prefix_extra" "1")
8899 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8900 (set_attr "mode" "DI")])
8902 (define_insn "*avx_pmaddubsw128"
8903 [(set (match_operand:V8HI 0 "register_operand" "=x")
8908 (match_operand:V16QI 1 "register_operand" "x")
8909 (parallel [(const_int 0)
8919 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8920 (parallel [(const_int 0)
8930 (vec_select:V8QI (match_dup 1)
8931 (parallel [(const_int 1)
8940 (vec_select:V8QI (match_dup 2)
8941 (parallel [(const_int 1)
8948 (const_int 15)]))))))]
8950 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8951 [(set_attr "type" "sseiadd")
8952 (set_attr "prefix_extra" "1")
8953 (set_attr "prefix" "vex")
8954 (set_attr "mode" "TI")])
8956 (define_insn "ssse3_pmaddubsw128"
8957 [(set (match_operand:V8HI 0 "register_operand" "=x")
8962 (match_operand:V16QI 1 "register_operand" "0")
8963 (parallel [(const_int 0)
8973 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8974 (parallel [(const_int 0)
8984 (vec_select:V8QI (match_dup 1)
8985 (parallel [(const_int 1)
8994 (vec_select:V8QI (match_dup 2)
8995 (parallel [(const_int 1)
9002 (const_int 15)]))))))]
9004 "pmaddubsw\t{%2, %0|%0, %2}"
9005 [(set_attr "type" "sseiadd")
9006 (set_attr "atom_unit" "simul")
9007 (set_attr "prefix_data16" "1")
9008 (set_attr "prefix_extra" "1")
9009 (set_attr "mode" "TI")])
9011 (define_insn "ssse3_pmaddubsw"
9012 [(set (match_operand:V4HI 0 "register_operand" "=y")
9017 (match_operand:V8QI 1 "register_operand" "0")
9018 (parallel [(const_int 0)
9024 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9025 (parallel [(const_int 0)
9031 (vec_select:V4QI (match_dup 1)
9032 (parallel [(const_int 1)
9037 (vec_select:V4QI (match_dup 2)
9038 (parallel [(const_int 1)
9041 (const_int 7)]))))))]
9043 "pmaddubsw\t{%2, %0|%0, %2}"
9044 [(set_attr "type" "sseiadd")
9045 (set_attr "atom_unit" "simul")
9046 (set_attr "prefix_extra" "1")
9047 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9048 (set_attr "mode" "DI")])
9050 (define_expand "ssse3_pmulhrswv8hi3"
9051 [(set (match_operand:V8HI 0 "register_operand" "")
9058 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9060 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9062 (const_vector:V8HI [(const_int 1) (const_int 1)
9063 (const_int 1) (const_int 1)
9064 (const_int 1) (const_int 1)
9065 (const_int 1) (const_int 1)]))
9068 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9070 (define_insn "*avx_pmulhrswv8hi3"
9071 [(set (match_operand:V8HI 0 "register_operand" "=x")
9078 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9080 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9082 (const_vector:V8HI [(const_int 1) (const_int 1)
9083 (const_int 1) (const_int 1)
9084 (const_int 1) (const_int 1)
9085 (const_int 1) (const_int 1)]))
9087 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9088 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9089 [(set_attr "type" "sseimul")
9090 (set_attr "prefix_extra" "1")
9091 (set_attr "prefix" "vex")
9092 (set_attr "mode" "TI")])
9094 (define_insn "*ssse3_pmulhrswv8hi3"
9095 [(set (match_operand:V8HI 0 "register_operand" "=x")
9102 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9104 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9106 (const_vector:V8HI [(const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)]))
9111 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9112 "pmulhrsw\t{%2, %0|%0, %2}"
9113 [(set_attr "type" "sseimul")
9114 (set_attr "prefix_data16" "1")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "mode" "TI")])
9118 (define_expand "ssse3_pmulhrswv4hi3"
9119 [(set (match_operand:V4HI 0 "register_operand" "")
9126 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9128 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9130 (const_vector:V4HI [(const_int 1) (const_int 1)
9131 (const_int 1) (const_int 1)]))
9134 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9136 (define_insn "*ssse3_pmulhrswv4hi3"
9137 [(set (match_operand:V4HI 0 "register_operand" "=y")
9144 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9146 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9148 (const_vector:V4HI [(const_int 1) (const_int 1)
9149 (const_int 1) (const_int 1)]))
9151 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9152 "pmulhrsw\t{%2, %0|%0, %2}"
9153 [(set_attr "type" "sseimul")
9154 (set_attr "prefix_extra" "1")
9155 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9156 (set_attr "mode" "DI")])
9158 (define_insn "*avx_pshufbv16qi3"
9159 [(set (match_operand:V16QI 0 "register_operand" "=x")
9160 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9161 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9164 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9165 [(set_attr "type" "sselog1")
9166 (set_attr "prefix_extra" "1")
9167 (set_attr "prefix" "vex")
9168 (set_attr "mode" "TI")])
9170 (define_insn "ssse3_pshufbv16qi3"
9171 [(set (match_operand:V16QI 0 "register_operand" "=x")
9172 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9173 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9176 "pshufb\t{%2, %0|%0, %2}";
9177 [(set_attr "type" "sselog1")
9178 (set_attr "prefix_data16" "1")
9179 (set_attr "prefix_extra" "1")
9180 (set_attr "mode" "TI")])
9182 (define_insn "ssse3_pshufbv8qi3"
9183 [(set (match_operand:V8QI 0 "register_operand" "=y")
9184 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9185 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9188 "pshufb\t{%2, %0|%0, %2}";
9189 [(set_attr "type" "sselog1")
9190 (set_attr "prefix_extra" "1")
9191 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9192 (set_attr "mode" "DI")])
9194 (define_insn "*avx_psign<mode>3"
9195 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9197 [(match_operand:SSEMODE124 1 "register_operand" "x")
9198 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9201 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9202 [(set_attr "type" "sselog1")
9203 (set_attr "prefix_extra" "1")
9204 (set_attr "prefix" "vex")
9205 (set_attr "mode" "TI")])
9207 (define_insn "ssse3_psign<mode>3"
9208 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9210 [(match_operand:SSEMODE124 1 "register_operand" "0")
9211 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9214 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9215 [(set_attr "type" "sselog1")
9216 (set_attr "prefix_data16" "1")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "mode" "TI")])
9220 (define_insn "ssse3_psign<mode>3"
9221 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9223 [(match_operand:MMXMODEI 1 "register_operand" "0")
9224 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9227 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9228 [(set_attr "type" "sselog1")
9229 (set_attr "prefix_extra" "1")
9230 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9231 (set_attr "mode" "DI")])
9233 (define_insn "*avx_palignrti"
9234 [(set (match_operand:TI 0 "register_operand" "=x")
9235 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9236 (match_operand:TI 2 "nonimmediate_operand" "xm")
9237 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9241 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9242 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9244 [(set_attr "type" "sseishft")
9245 (set_attr "prefix_extra" "1")
9246 (set_attr "length_immediate" "1")
9247 (set_attr "prefix" "vex")
9248 (set_attr "mode" "TI")])
9250 (define_insn "ssse3_palignrti"
9251 [(set (match_operand:TI 0 "register_operand" "=x")
9252 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9253 (match_operand:TI 2 "nonimmediate_operand" "xm")
9254 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9258 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9259 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9261 [(set_attr "type" "sseishft")
9262 (set_attr "atom_unit" "sishuf")
9263 (set_attr "prefix_data16" "1")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "length_immediate" "1")
9266 (set_attr "mode" "TI")])
9268 (define_insn "ssse3_palignrdi"
9269 [(set (match_operand:DI 0 "register_operand" "=y")
9270 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9271 (match_operand:DI 2 "nonimmediate_operand" "ym")
9272 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9276 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9277 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9279 [(set_attr "type" "sseishft")
9280 (set_attr "atom_unit" "sishuf")
9281 (set_attr "prefix_extra" "1")
9282 (set_attr "length_immediate" "1")
9283 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9284 (set_attr "mode" "DI")])
9286 (define_insn "abs<mode>2"
9287 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9288 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9290 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9291 [(set_attr "type" "sselog1")
9292 (set_attr "prefix_data16" "1")
9293 (set_attr "prefix_extra" "1")
9294 (set_attr "prefix" "maybe_vex")
9295 (set_attr "mode" "TI")])
9297 (define_insn "abs<mode>2"
9298 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9299 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9301 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9302 [(set_attr "type" "sselog1")
9303 (set_attr "prefix_rep" "0")
9304 (set_attr "prefix_extra" "1")
9305 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9306 (set_attr "mode" "DI")])
9308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9310 ;; AMD SSE4A instructions
9312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9314 (define_insn "sse4a_movnt<mode>"
9315 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9317 [(match_operand:MODEF 1 "register_operand" "x")]
9320 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9321 [(set_attr "type" "ssemov")
9322 (set_attr "mode" "<MODE>")])
9324 (define_insn "sse4a_vmmovnt<mode>"
9325 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9326 (unspec:<ssescalarmode>
9327 [(vec_select:<ssescalarmode>
9328 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9329 (parallel [(const_int 0)]))]
9332 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9333 [(set_attr "type" "ssemov")
9334 (set_attr "mode" "<ssescalarmode>")])
9336 (define_insn "sse4a_extrqi"
9337 [(set (match_operand:V2DI 0 "register_operand" "=x")
9338 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9339 (match_operand 2 "const_int_operand" "")
9340 (match_operand 3 "const_int_operand" "")]
9343 "extrq\t{%3, %2, %0|%0, %2, %3}"
9344 [(set_attr "type" "sse")
9345 (set_attr "prefix_data16" "1")
9346 (set_attr "length_immediate" "2")
9347 (set_attr "mode" "TI")])
9349 (define_insn "sse4a_extrq"
9350 [(set (match_operand:V2DI 0 "register_operand" "=x")
9351 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9352 (match_operand:V16QI 2 "register_operand" "x")]
9355 "extrq\t{%2, %0|%0, %2}"
9356 [(set_attr "type" "sse")
9357 (set_attr "prefix_data16" "1")
9358 (set_attr "mode" "TI")])
9360 (define_insn "sse4a_insertqi"
9361 [(set (match_operand:V2DI 0 "register_operand" "=x")
9362 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9363 (match_operand:V2DI 2 "register_operand" "x")
9364 (match_operand 3 "const_int_operand" "")
9365 (match_operand 4 "const_int_operand" "")]
9368 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9369 [(set_attr "type" "sseins")
9370 (set_attr "prefix_data16" "0")
9371 (set_attr "prefix_rep" "1")
9372 (set_attr "length_immediate" "2")
9373 (set_attr "mode" "TI")])
9375 (define_insn "sse4a_insertq"
9376 [(set (match_operand:V2DI 0 "register_operand" "=x")
9377 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9378 (match_operand:V2DI 2 "register_operand" "x")]
9381 "insertq\t{%2, %0|%0, %2}"
9382 [(set_attr "type" "sseins")
9383 (set_attr "prefix_data16" "0")
9384 (set_attr "prefix_rep" "1")
9385 (set_attr "mode" "TI")])
9387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9389 ;; Intel SSE4.1 instructions
9391 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9393 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9394 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9395 (vec_merge:AVXMODEF2P
9396 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9397 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9398 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9400 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9401 [(set_attr "type" "ssemov")
9402 (set_attr "prefix_extra" "1")
9403 (set_attr "length_immediate" "1")
9404 (set_attr "prefix" "vex")
9405 (set_attr "mode" "<avxvecmode>")])
9407 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9408 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9410 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9411 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9412 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9415 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9416 [(set_attr "type" "ssemov")
9417 (set_attr "prefix_extra" "1")
9418 (set_attr "length_immediate" "1")
9419 (set_attr "prefix" "vex")
9420 (set_attr "mode" "<avxvecmode>")])
9422 (define_insn "sse4_1_blend<ssemodesuffix>"
9423 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9424 (vec_merge:SSEMODEF2P
9425 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9426 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9427 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9429 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9430 [(set_attr "type" "ssemov")
9431 (set_attr "prefix_data16" "1")
9432 (set_attr "prefix_extra" "1")
9433 (set_attr "length_immediate" "1")
9434 (set_attr "mode" "<MODE>")])
9436 (define_insn "sse4_1_blendv<ssemodesuffix>"
9437 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9439 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9440 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9441 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9444 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9445 [(set_attr "type" "ssemov")
9446 (set_attr "prefix_data16" "1")
9447 (set_attr "prefix_extra" "1")
9448 (set_attr "mode" "<MODE>")])
9450 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9451 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9453 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9454 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9455 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9458 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9459 [(set_attr "type" "ssemul")
9460 (set_attr "prefix" "vex")
9461 (set_attr "prefix_extra" "1")
9462 (set_attr "length_immediate" "1")
9463 (set_attr "mode" "<avxvecmode>")])
9465 (define_insn "sse4_1_dp<ssemodesuffix>"
9466 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9468 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9469 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9470 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9473 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9474 [(set_attr "type" "ssemul")
9475 (set_attr "prefix_data16" "1")
9476 (set_attr "prefix_extra" "1")
9477 (set_attr "length_immediate" "1")
9478 (set_attr "mode" "<MODE>")])
9480 (define_insn "sse4_1_movntdqa"
9481 [(set (match_operand:V2DI 0 "register_operand" "=x")
9482 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9485 "%vmovntdqa\t{%1, %0|%0, %1}"
9486 [(set_attr "type" "ssemov")
9487 (set_attr "prefix_extra" "1")
9488 (set_attr "prefix" "maybe_vex")
9489 (set_attr "mode" "TI")])
9491 (define_insn "*avx_mpsadbw"
9492 [(set (match_operand:V16QI 0 "register_operand" "=x")
9493 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9494 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9495 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9498 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9499 [(set_attr "type" "sselog1")
9500 (set_attr "prefix" "vex")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "length_immediate" "1")
9503 (set_attr "mode" "TI")])
9505 (define_insn "sse4_1_mpsadbw"
9506 [(set (match_operand:V16QI 0 "register_operand" "=x")
9507 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9508 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9509 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9512 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9513 [(set_attr "type" "sselog1")
9514 (set_attr "prefix_extra" "1")
9515 (set_attr "length_immediate" "1")
9516 (set_attr "mode" "TI")])
9518 (define_insn "*avx_packusdw"
9519 [(set (match_operand:V8HI 0 "register_operand" "=x")
9522 (match_operand:V4SI 1 "register_operand" "x"))
9524 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9526 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9527 [(set_attr "type" "sselog")
9528 (set_attr "prefix_extra" "1")
9529 (set_attr "prefix" "vex")
9530 (set_attr "mode" "TI")])
9532 (define_insn "sse4_1_packusdw"
9533 [(set (match_operand:V8HI 0 "register_operand" "=x")
9536 (match_operand:V4SI 1 "register_operand" "0"))
9538 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9540 "packusdw\t{%2, %0|%0, %2}"
9541 [(set_attr "type" "sselog")
9542 (set_attr "prefix_extra" "1")
9543 (set_attr "mode" "TI")])
9545 (define_insn "*avx_pblendvb"
9546 [(set (match_operand:V16QI 0 "register_operand" "=x")
9547 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9548 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9549 (match_operand:V16QI 3 "register_operand" "x")]
9552 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9553 [(set_attr "type" "ssemov")
9554 (set_attr "prefix_extra" "1")
9555 (set_attr "length_immediate" "1")
9556 (set_attr "prefix" "vex")
9557 (set_attr "mode" "TI")])
9559 (define_insn "sse4_1_pblendvb"
9560 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9561 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9562 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9563 (match_operand:V16QI 3 "register_operand" "Yz")]
9566 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9567 [(set_attr "type" "ssemov")
9568 (set_attr "prefix_extra" "1")
9569 (set_attr "mode" "TI")])
9571 (define_insn "*avx_pblendw"
9572 [(set (match_operand:V8HI 0 "register_operand" "=x")
9574 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9575 (match_operand:V8HI 1 "register_operand" "x")
9576 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9578 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9579 [(set_attr "type" "ssemov")
9580 (set_attr "prefix" "vex")
9581 (set_attr "prefix_extra" "1")
9582 (set_attr "length_immediate" "1")
9583 (set_attr "mode" "TI")])
9585 (define_insn "sse4_1_pblendw"
9586 [(set (match_operand:V8HI 0 "register_operand" "=x")
9588 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9589 (match_operand:V8HI 1 "register_operand" "0")
9590 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9592 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9593 [(set_attr "type" "ssemov")
9594 (set_attr "prefix_extra" "1")
9595 (set_attr "length_immediate" "1")
9596 (set_attr "mode" "TI")])
9598 (define_insn "sse4_1_phminposuw"
9599 [(set (match_operand:V8HI 0 "register_operand" "=x")
9600 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9601 UNSPEC_PHMINPOSUW))]
9603 "%vphminposuw\t{%1, %0|%0, %1}"
9604 [(set_attr "type" "sselog1")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "mode" "TI")])
9609 (define_insn "sse4_1_<code>v8qiv8hi2"
9610 [(set (match_operand:V8HI 0 "register_operand" "=x")
9613 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9614 (parallel [(const_int 0)
9623 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9624 [(set_attr "type" "ssemov")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "prefix" "maybe_vex")
9627 (set_attr "mode" "TI")])
9629 (define_insn "sse4_1_<code>v4qiv4si2"
9630 [(set (match_operand:V4SI 0 "register_operand" "=x")
9633 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9634 (parallel [(const_int 0)
9639 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9640 [(set_attr "type" "ssemov")
9641 (set_attr "prefix_extra" "1")
9642 (set_attr "prefix" "maybe_vex")
9643 (set_attr "mode" "TI")])
9645 (define_insn "sse4_1_<code>v4hiv4si2"
9646 [(set (match_operand:V4SI 0 "register_operand" "=x")
9649 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9650 (parallel [(const_int 0)
9655 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9656 [(set_attr "type" "ssemov")
9657 (set_attr "prefix_extra" "1")
9658 (set_attr "prefix" "maybe_vex")
9659 (set_attr "mode" "TI")])
9661 (define_insn "sse4_1_<code>v2qiv2di2"
9662 [(set (match_operand:V2DI 0 "register_operand" "=x")
9665 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9666 (parallel [(const_int 0)
9669 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9670 [(set_attr "type" "ssemov")
9671 (set_attr "prefix_extra" "1")
9672 (set_attr "prefix" "maybe_vex")
9673 (set_attr "mode" "TI")])
9675 (define_insn "sse4_1_<code>v2hiv2di2"
9676 [(set (match_operand:V2DI 0 "register_operand" "=x")
9679 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9680 (parallel [(const_int 0)
9683 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9684 [(set_attr "type" "ssemov")
9685 (set_attr "prefix_extra" "1")
9686 (set_attr "prefix" "maybe_vex")
9687 (set_attr "mode" "TI")])
9689 (define_insn "sse4_1_<code>v2siv2di2"
9690 [(set (match_operand:V2DI 0 "register_operand" "=x")
9693 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9694 (parallel [(const_int 0)
9697 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9698 [(set_attr "type" "ssemov")
9699 (set_attr "prefix_extra" "1")
9700 (set_attr "prefix" "maybe_vex")
9701 (set_attr "mode" "TI")])
9703 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9704 ;; setting FLAGS_REG. But it is not a really compare instruction.
9705 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9706 [(set (reg:CC FLAGS_REG)
9707 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9708 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9711 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9712 [(set_attr "type" "ssecomi")
9713 (set_attr "prefix_extra" "1")
9714 (set_attr "prefix" "vex")
9715 (set_attr "mode" "<MODE>")])
9717 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9718 ;; But it is not a really compare instruction.
9719 (define_insn "avx_ptest256"
9720 [(set (reg:CC FLAGS_REG)
9721 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9722 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9725 "vptest\t{%1, %0|%0, %1}"
9726 [(set_attr "type" "ssecomi")
9727 (set_attr "prefix_extra" "1")
9728 (set_attr "prefix" "vex")
9729 (set_attr "mode" "OI")])
9731 (define_insn "sse4_1_ptest"
9732 [(set (reg:CC FLAGS_REG)
9733 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9734 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9737 "%vptest\t{%1, %0|%0, %1}"
9738 [(set_attr "type" "ssecomi")
9739 (set_attr "prefix_extra" "1")
9740 (set_attr "prefix" "maybe_vex")
9741 (set_attr "mode" "TI")])
9743 (define_insn "avx_round<ssemodesuffix>256"
9744 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9745 (unspec:AVX256MODEF2P
9746 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9747 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9750 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9751 [(set_attr "type" "ssecvt")
9752 (set_attr "prefix_extra" "1")
9753 (set_attr "length_immediate" "1")
9754 (set_attr "prefix" "vex")
9755 (set_attr "mode" "<MODE>")])
9757 (define_insn "sse4_1_round<ssemodesuffix>"
9758 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9760 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9761 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9764 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9765 [(set_attr "type" "ssecvt")
9766 (set_attr "prefix_data16" "1")
9767 (set_attr "prefix_extra" "1")
9768 (set_attr "length_immediate" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "mode" "<MODE>")])
9772 (define_insn "*avx_round<ssescalarmodesuffix>"
9773 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9774 (vec_merge:SSEMODEF2P
9776 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9777 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9779 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9782 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9783 [(set_attr "type" "ssecvt")
9784 (set_attr "prefix_extra" "1")
9785 (set_attr "length_immediate" "1")
9786 (set_attr "prefix" "vex")
9787 (set_attr "mode" "<MODE>")])
9789 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9791 (vec_merge:SSEMODEF2P
9793 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9794 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9796 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9799 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9800 [(set_attr "type" "ssecvt")
9801 (set_attr "prefix_data16" "1")
9802 (set_attr "prefix_extra" "1")
9803 (set_attr "length_immediate" "1")
9804 (set_attr "mode" "<MODE>")])
9806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9808 ;; Intel SSE4.2 string/text processing instructions
9810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9812 (define_insn_and_split "sse4_2_pcmpestr"
9813 [(set (match_operand:SI 0 "register_operand" "=c,c")
9815 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9816 (match_operand:SI 3 "register_operand" "a,a")
9817 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9818 (match_operand:SI 5 "register_operand" "d,d")
9819 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9821 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9829 (set (reg:CC FLAGS_REG)
9838 && can_create_pseudo_p ()"
9843 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9844 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9845 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9848 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9849 operands[3], operands[4],
9850 operands[5], operands[6]));
9852 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9853 operands[3], operands[4],
9854 operands[5], operands[6]));
9855 if (flags && !(ecx || xmm0))
9856 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9857 operands[2], operands[3],
9858 operands[4], operands[5],
9862 [(set_attr "type" "sselog")
9863 (set_attr "prefix_data16" "1")
9864 (set_attr "prefix_extra" "1")
9865 (set_attr "length_immediate" "1")
9866 (set_attr "memory" "none,load")
9867 (set_attr "mode" "TI")])
9869 (define_insn "sse4_2_pcmpestri"
9870 [(set (match_operand:SI 0 "register_operand" "=c,c")
9872 [(match_operand:V16QI 1 "register_operand" "x,x")
9873 (match_operand:SI 2 "register_operand" "a,a")
9874 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9875 (match_operand:SI 4 "register_operand" "d,d")
9876 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9878 (set (reg:CC FLAGS_REG)
9887 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9888 [(set_attr "type" "sselog")
9889 (set_attr "prefix_data16" "1")
9890 (set_attr "prefix_extra" "1")
9891 (set_attr "prefix" "maybe_vex")
9892 (set_attr "length_immediate" "1")
9893 (set_attr "memory" "none,load")
9894 (set_attr "mode" "TI")])
9896 (define_insn "sse4_2_pcmpestrm"
9897 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9899 [(match_operand:V16QI 1 "register_operand" "x,x")
9900 (match_operand:SI 2 "register_operand" "a,a")
9901 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9902 (match_operand:SI 4 "register_operand" "d,d")
9903 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9905 (set (reg:CC FLAGS_REG)
9914 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9915 [(set_attr "type" "sselog")
9916 (set_attr "prefix_data16" "1")
9917 (set_attr "prefix_extra" "1")
9918 (set_attr "length_immediate" "1")
9919 (set_attr "prefix" "maybe_vex")
9920 (set_attr "memory" "none,load")
9921 (set_attr "mode" "TI")])
9923 (define_insn "sse4_2_pcmpestr_cconly"
9924 [(set (reg:CC FLAGS_REG)
9926 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9927 (match_operand:SI 3 "register_operand" "a,a,a,a")
9928 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9929 (match_operand:SI 5 "register_operand" "d,d,d,d")
9930 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9932 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9933 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9936 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9937 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9938 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9939 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9940 [(set_attr "type" "sselog")
9941 (set_attr "prefix_data16" "1")
9942 (set_attr "prefix_extra" "1")
9943 (set_attr "length_immediate" "1")
9944 (set_attr "memory" "none,load,none,load")
9945 (set_attr "prefix" "maybe_vex")
9946 (set_attr "mode" "TI")])
9948 (define_insn_and_split "sse4_2_pcmpistr"
9949 [(set (match_operand:SI 0 "register_operand" "=c,c")
9951 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9952 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9953 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9955 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9961 (set (reg:CC FLAGS_REG)
9968 && can_create_pseudo_p ()"
9973 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9974 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9975 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9978 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9979 operands[3], operands[4]));
9981 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9982 operands[3], operands[4]));
9983 if (flags && !(ecx || xmm0))
9984 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9985 operands[2], operands[3],
9989 [(set_attr "type" "sselog")
9990 (set_attr "prefix_data16" "1")
9991 (set_attr "prefix_extra" "1")
9992 (set_attr "length_immediate" "1")
9993 (set_attr "memory" "none,load")
9994 (set_attr "mode" "TI")])
9996 (define_insn "sse4_2_pcmpistri"
9997 [(set (match_operand:SI 0 "register_operand" "=c,c")
9999 [(match_operand:V16QI 1 "register_operand" "x,x")
10000 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10001 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10003 (set (reg:CC FLAGS_REG)
10010 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10011 [(set_attr "type" "sselog")
10012 (set_attr "prefix_data16" "1")
10013 (set_attr "prefix_extra" "1")
10014 (set_attr "length_immediate" "1")
10015 (set_attr "prefix" "maybe_vex")
10016 (set_attr "memory" "none,load")
10017 (set_attr "mode" "TI")])
10019 (define_insn "sse4_2_pcmpistrm"
10020 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10022 [(match_operand:V16QI 1 "register_operand" "x,x")
10023 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10024 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10026 (set (reg:CC FLAGS_REG)
10033 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10034 [(set_attr "type" "sselog")
10035 (set_attr "prefix_data16" "1")
10036 (set_attr "prefix_extra" "1")
10037 (set_attr "length_immediate" "1")
10038 (set_attr "prefix" "maybe_vex")
10039 (set_attr "memory" "none,load")
10040 (set_attr "mode" "TI")])
10042 (define_insn "sse4_2_pcmpistr_cconly"
10043 [(set (reg:CC FLAGS_REG)
10045 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10046 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10047 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10049 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10050 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10053 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10054 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10055 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10056 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10057 [(set_attr "type" "sselog")
10058 (set_attr "prefix_data16" "1")
10059 (set_attr "prefix_extra" "1")
10060 (set_attr "length_immediate" "1")
10061 (set_attr "memory" "none,load,none,load")
10062 (set_attr "prefix" "maybe_vex")
10063 (set_attr "mode" "TI")])
10065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10067 ;; XOP instructions
10069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10071 ;; XOP parallel integer multiply/add instructions.
10072 ;; Note the XOP multiply/add instructions
10073 ;; a[i] = b[i] * c[i] + d[i];
10074 ;; do not allow the value being added to be a memory operation.
10075 (define_insn "xop_pmacsww"
10076 [(set (match_operand:V8HI 0 "register_operand" "=x")
10079 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10080 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10081 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10083 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10084 [(set_attr "type" "ssemuladd")
10085 (set_attr "mode" "TI")])
10087 (define_insn "xop_pmacssww"
10088 [(set (match_operand:V8HI 0 "register_operand" "=x")
10090 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10091 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10092 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10094 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10095 [(set_attr "type" "ssemuladd")
10096 (set_attr "mode" "TI")])
10098 (define_insn "xop_pmacsdd"
10099 [(set (match_operand:V4SI 0 "register_operand" "=x")
10102 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10103 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10104 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10106 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10107 [(set_attr "type" "ssemuladd")
10108 (set_attr "mode" "TI")])
10110 (define_insn "xop_pmacssdd"
10111 [(set (match_operand:V4SI 0 "register_operand" "=x")
10113 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10114 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10115 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10117 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10118 [(set_attr "type" "ssemuladd")
10119 (set_attr "mode" "TI")])
10121 (define_insn "xop_pmacssdql"
10122 [(set (match_operand:V2DI 0 "register_operand" "=x")
10127 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10128 (parallel [(const_int 1)
10131 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10132 (parallel [(const_int 1)
10134 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10136 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10137 [(set_attr "type" "ssemuladd")
10138 (set_attr "mode" "TI")])
10140 (define_insn "xop_pmacssdqh"
10141 [(set (match_operand:V2DI 0 "register_operand" "=x")
10146 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10147 (parallel [(const_int 0)
10151 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10152 (parallel [(const_int 0)
10154 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10156 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10157 [(set_attr "type" "ssemuladd")
10158 (set_attr "mode" "TI")])
10160 (define_insn "xop_pmacsdql"
10161 [(set (match_operand:V2DI 0 "register_operand" "=x")
10166 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10167 (parallel [(const_int 1)
10171 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10172 (parallel [(const_int 1)
10174 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10176 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10177 [(set_attr "type" "ssemuladd")
10178 (set_attr "mode" "TI")])
10180 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10181 ;; fake it with a multiply/add. In general, we expect the define_split to
10182 ;; occur before register allocation, so we have to handle the corner case where
10183 ;; the target is the same as operands 1/2
10184 (define_insn_and_split "xop_mulv2div2di3_low"
10185 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10189 (match_operand:V4SI 1 "register_operand" "%x")
10190 (parallel [(const_int 1)
10194 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10195 (parallel [(const_int 1)
10196 (const_int 3)])))))]
10199 "&& reload_completed"
10200 [(set (match_dup 0)
10208 (parallel [(const_int 1)
10213 (parallel [(const_int 1)
10217 operands[3] = CONST0_RTX (V2DImode);
10219 [(set_attr "type" "ssemul")
10220 (set_attr "mode" "TI")])
10222 (define_insn "xop_pmacsdqh"
10223 [(set (match_operand:V2DI 0 "register_operand" "=x")
10228 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10229 (parallel [(const_int 0)
10233 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10234 (parallel [(const_int 0)
10236 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10238 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10239 [(set_attr "type" "ssemuladd")
10240 (set_attr "mode" "TI")])
10242 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10243 ;; fake it with a multiply/add. In general, we expect the define_split to
10244 ;; occur before register allocation, so we have to handle the corner case where
10245 ;; the target is the same as either operands[1] or operands[2]
10246 (define_insn_and_split "xop_mulv2div2di3_high"
10247 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10251 (match_operand:V4SI 1 "register_operand" "%x")
10252 (parallel [(const_int 0)
10256 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10257 (parallel [(const_int 0)
10258 (const_int 2)])))))]
10261 "&& reload_completed"
10262 [(set (match_dup 0)
10270 (parallel [(const_int 0)
10275 (parallel [(const_int 0)
10279 operands[3] = CONST0_RTX (V2DImode);
10281 [(set_attr "type" "ssemul")
10282 (set_attr "mode" "TI")])
10284 ;; XOP parallel integer multiply/add instructions for the intrinisics
10285 (define_insn "xop_pmacsswd"
10286 [(set (match_operand:V4SI 0 "register_operand" "=x")
10291 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10292 (parallel [(const_int 1)
10298 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10299 (parallel [(const_int 1)
10303 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10305 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10306 [(set_attr "type" "ssemuladd")
10307 (set_attr "mode" "TI")])
10309 (define_insn "xop_pmacswd"
10310 [(set (match_operand:V4SI 0 "register_operand" "=x")
10315 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10316 (parallel [(const_int 1)
10322 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10323 (parallel [(const_int 1)
10327 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10329 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10330 [(set_attr "type" "ssemuladd")
10331 (set_attr "mode" "TI")])
10333 (define_insn "xop_pmadcsswd"
10334 [(set (match_operand:V4SI 0 "register_operand" "=x")
10340 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10341 (parallel [(const_int 0)
10347 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10348 (parallel [(const_int 0)
10356 (parallel [(const_int 1)
10363 (parallel [(const_int 1)
10366 (const_int 7)])))))
10367 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10369 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10370 [(set_attr "type" "ssemuladd")
10371 (set_attr "mode" "TI")])
10373 (define_insn "xop_pmadcswd"
10374 [(set (match_operand:V4SI 0 "register_operand" "=x")
10380 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10381 (parallel [(const_int 0)
10387 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10388 (parallel [(const_int 0)
10396 (parallel [(const_int 1)
10403 (parallel [(const_int 1)
10406 (const_int 7)])))))
10407 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10409 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10410 [(set_attr "type" "ssemuladd")
10411 (set_attr "mode" "TI")])
10413 ;; XOP parallel XMM conditional moves
10414 (define_insn "xop_pcmov_<mode>"
10415 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10416 (if_then_else:SSEMODE
10417 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10418 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10419 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10421 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10422 [(set_attr "type" "sse4arg")])
10424 (define_insn "xop_pcmov_<mode>256"
10425 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10426 (if_then_else:AVX256MODE
10427 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10428 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10429 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10431 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10432 [(set_attr "type" "sse4arg")])
10434 ;; XOP horizontal add/subtract instructions
10435 (define_insn "xop_phaddbw"
10436 [(set (match_operand:V8HI 0 "register_operand" "=x")
10440 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10441 (parallel [(const_int 0)
10452 (parallel [(const_int 1)
10459 (const_int 15)])))))]
10461 "vphaddbw\t{%1, %0|%0, %1}"
10462 [(set_attr "type" "sseiadd1")])
10464 (define_insn "xop_phaddbd"
10465 [(set (match_operand:V4SI 0 "register_operand" "=x")
10470 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10471 (parallel [(const_int 0)
10478 (parallel [(const_int 1)
10481 (const_int 13)]))))
10486 (parallel [(const_int 2)
10493 (parallel [(const_int 3)
10496 (const_int 15)]))))))]
10498 "vphaddbd\t{%1, %0|%0, %1}"
10499 [(set_attr "type" "sseiadd1")])
10501 (define_insn "xop_phaddbq"
10502 [(set (match_operand:V2DI 0 "register_operand" "=x")
10508 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10509 (parallel [(const_int 0)
10514 (parallel [(const_int 1)
10520 (parallel [(const_int 2)
10525 (parallel [(const_int 3)
10526 (const_int 7)])))))
10532 (parallel [(const_int 8)
10537 (parallel [(const_int 9)
10538 (const_int 13)]))))
10543 (parallel [(const_int 10)
10548 (parallel [(const_int 11)
10549 (const_int 15)])))))))]
10551 "vphaddbq\t{%1, %0|%0, %1}"
10552 [(set_attr "type" "sseiadd1")])
10554 (define_insn "xop_phaddwd"
10555 [(set (match_operand:V4SI 0 "register_operand" "=x")
10559 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10560 (parallel [(const_int 0)
10567 (parallel [(const_int 1)
10570 (const_int 7)])))))]
10572 "vphaddwd\t{%1, %0|%0, %1}"
10573 [(set_attr "type" "sseiadd1")])
10575 (define_insn "xop_phaddwq"
10576 [(set (match_operand:V2DI 0 "register_operand" "=x")
10581 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10582 (parallel [(const_int 0)
10587 (parallel [(const_int 1)
10593 (parallel [(const_int 2)
10598 (parallel [(const_int 3)
10599 (const_int 7)]))))))]
10601 "vphaddwq\t{%1, %0|%0, %1}"
10602 [(set_attr "type" "sseiadd1")])
10604 (define_insn "xop_phadddq"
10605 [(set (match_operand:V2DI 0 "register_operand" "=x")
10609 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10610 (parallel [(const_int 0)
10615 (parallel [(const_int 1)
10616 (const_int 3)])))))]
10618 "vphadddq\t{%1, %0|%0, %1}"
10619 [(set_attr "type" "sseiadd1")])
10621 (define_insn "xop_phaddubw"
10622 [(set (match_operand:V8HI 0 "register_operand" "=x")
10626 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10627 (parallel [(const_int 0)
10638 (parallel [(const_int 1)
10645 (const_int 15)])))))]
10647 "vphaddubw\t{%1, %0|%0, %1}"
10648 [(set_attr "type" "sseiadd1")])
10650 (define_insn "xop_phaddubd"
10651 [(set (match_operand:V4SI 0 "register_operand" "=x")
10656 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10657 (parallel [(const_int 0)
10664 (parallel [(const_int 1)
10667 (const_int 13)]))))
10672 (parallel [(const_int 2)
10679 (parallel [(const_int 3)
10682 (const_int 15)]))))))]
10684 "vphaddubd\t{%1, %0|%0, %1}"
10685 [(set_attr "type" "sseiadd1")])
10687 (define_insn "xop_phaddubq"
10688 [(set (match_operand:V2DI 0 "register_operand" "=x")
10694 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10695 (parallel [(const_int 0)
10700 (parallel [(const_int 1)
10706 (parallel [(const_int 2)
10711 (parallel [(const_int 3)
10712 (const_int 7)])))))
10718 (parallel [(const_int 8)
10723 (parallel [(const_int 9)
10724 (const_int 13)]))))
10729 (parallel [(const_int 10)
10734 (parallel [(const_int 11)
10735 (const_int 15)])))))))]
10737 "vphaddubq\t{%1, %0|%0, %1}"
10738 [(set_attr "type" "sseiadd1")])
10740 (define_insn "xop_phadduwd"
10741 [(set (match_operand:V4SI 0 "register_operand" "=x")
10745 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10746 (parallel [(const_int 0)
10753 (parallel [(const_int 1)
10756 (const_int 7)])))))]
10758 "vphadduwd\t{%1, %0|%0, %1}"
10759 [(set_attr "type" "sseiadd1")])
10761 (define_insn "xop_phadduwq"
10762 [(set (match_operand:V2DI 0 "register_operand" "=x")
10767 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10768 (parallel [(const_int 0)
10773 (parallel [(const_int 1)
10779 (parallel [(const_int 2)
10784 (parallel [(const_int 3)
10785 (const_int 7)]))))))]
10787 "vphadduwq\t{%1, %0|%0, %1}"
10788 [(set_attr "type" "sseiadd1")])
10790 (define_insn "xop_phaddudq"
10791 [(set (match_operand:V2DI 0 "register_operand" "=x")
10795 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10796 (parallel [(const_int 0)
10801 (parallel [(const_int 1)
10802 (const_int 3)])))))]
10804 "vphaddudq\t{%1, %0|%0, %1}"
10805 [(set_attr "type" "sseiadd1")])
10807 (define_insn "xop_phsubbw"
10808 [(set (match_operand:V8HI 0 "register_operand" "=x")
10812 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10813 (parallel [(const_int 0)
10824 (parallel [(const_int 1)
10831 (const_int 15)])))))]
10833 "vphsubbw\t{%1, %0|%0, %1}"
10834 [(set_attr "type" "sseiadd1")])
10836 (define_insn "xop_phsubwd"
10837 [(set (match_operand:V4SI 0 "register_operand" "=x")
10841 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10842 (parallel [(const_int 0)
10849 (parallel [(const_int 1)
10852 (const_int 7)])))))]
10854 "vphsubwd\t{%1, %0|%0, %1}"
10855 [(set_attr "type" "sseiadd1")])
10857 (define_insn "xop_phsubdq"
10858 [(set (match_operand:V2DI 0 "register_operand" "=x")
10862 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10863 (parallel [(const_int 0)
10868 (parallel [(const_int 1)
10869 (const_int 3)])))))]
10871 "vphsubdq\t{%1, %0|%0, %1}"
10872 [(set_attr "type" "sseiadd1")])
10874 ;; XOP permute instructions
10875 (define_insn "xop_pperm"
10876 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10878 [(match_operand:V16QI 1 "register_operand" "x,x")
10879 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10880 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10881 UNSPEC_XOP_PERMUTE))]
10882 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10883 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10884 [(set_attr "type" "sse4arg")
10885 (set_attr "mode" "TI")])
10887 ;; XOP pack instructions that combine two vectors into a smaller vector
10888 (define_insn "xop_pperm_pack_v2di_v4si"
10889 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10892 (match_operand:V2DI 1 "register_operand" "x,x"))
10894 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10895 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10896 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10897 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10898 [(set_attr "type" "sse4arg")
10899 (set_attr "mode" "TI")])
10901 (define_insn "xop_pperm_pack_v4si_v8hi"
10902 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10905 (match_operand:V4SI 1 "register_operand" "x,x"))
10907 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10908 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10909 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10910 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10911 [(set_attr "type" "sse4arg")
10912 (set_attr "mode" "TI")])
10914 (define_insn "xop_pperm_pack_v8hi_v16qi"
10915 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10918 (match_operand:V8HI 1 "register_operand" "x,x"))
10920 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10921 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10922 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10923 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10924 [(set_attr "type" "sse4arg")
10925 (set_attr "mode" "TI")])
10927 ;; XOP packed rotate instructions
10928 (define_expand "rotl<mode>3"
10929 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10930 (rotate:SSEMODE1248
10931 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10932 (match_operand:SI 2 "general_operand")))]
10935 /* If we were given a scalar, convert it to parallel */
10936 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10938 rtvec vs = rtvec_alloc (<ssescalarnum>);
10939 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10940 rtx reg = gen_reg_rtx (<MODE>mode);
10941 rtx op2 = operands[2];
10944 if (GET_MODE (op2) != <ssescalarmode>mode)
10946 op2 = gen_reg_rtx (<ssescalarmode>mode);
10947 convert_move (op2, operands[2], false);
10950 for (i = 0; i < <ssescalarnum>; i++)
10951 RTVEC_ELT (vs, i) = op2;
10953 emit_insn (gen_vec_init<mode> (reg, par));
10954 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10959 (define_expand "rotr<mode>3"
10960 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10961 (rotatert:SSEMODE1248
10962 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10963 (match_operand:SI 2 "general_operand")))]
10966 /* If we were given a scalar, convert it to parallel */
10967 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10969 rtvec vs = rtvec_alloc (<ssescalarnum>);
10970 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10971 rtx neg = gen_reg_rtx (<MODE>mode);
10972 rtx reg = gen_reg_rtx (<MODE>mode);
10973 rtx op2 = operands[2];
10976 if (GET_MODE (op2) != <ssescalarmode>mode)
10978 op2 = gen_reg_rtx (<ssescalarmode>mode);
10979 convert_move (op2, operands[2], false);
10982 for (i = 0; i < <ssescalarnum>; i++)
10983 RTVEC_ELT (vs, i) = op2;
10985 emit_insn (gen_vec_init<mode> (reg, par));
10986 emit_insn (gen_neg<mode>2 (neg, reg));
10987 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10992 (define_insn "xop_rotl<mode>3"
10993 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10994 (rotate:SSEMODE1248
10995 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10996 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10998 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10999 [(set_attr "type" "sseishft")
11000 (set_attr "length_immediate" "1")
11001 (set_attr "mode" "TI")])
11003 (define_insn "xop_rotr<mode>3"
11004 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11005 (rotatert:SSEMODE1248
11006 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11007 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11010 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11011 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11013 [(set_attr "type" "sseishft")
11014 (set_attr "length_immediate" "1")
11015 (set_attr "mode" "TI")])
11017 (define_expand "vrotr<mode>3"
11018 [(match_operand:SSEMODE1248 0 "register_operand" "")
11019 (match_operand:SSEMODE1248 1 "register_operand" "")
11020 (match_operand:SSEMODE1248 2 "register_operand" "")]
11023 rtx reg = gen_reg_rtx (<MODE>mode);
11024 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11025 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11029 (define_expand "vrotl<mode>3"
11030 [(match_operand:SSEMODE1248 0 "register_operand" "")
11031 (match_operand:SSEMODE1248 1 "register_operand" "")
11032 (match_operand:SSEMODE1248 2 "register_operand" "")]
11035 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11039 (define_insn "xop_vrotl<mode>3"
11040 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11041 (if_then_else:SSEMODE1248
11043 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11045 (rotate:SSEMODE1248
11046 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11048 (rotatert:SSEMODE1248
11050 (neg:SSEMODE1248 (match_dup 2)))))]
11051 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11052 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11053 [(set_attr "type" "sseishft")
11054 (set_attr "prefix_data16" "0")
11055 (set_attr "prefix_extra" "2")
11056 (set_attr "mode" "TI")])
11058 ;; XOP packed shift instructions.
11059 ;; FIXME: add V2DI back in
11060 (define_expand "vlshr<mode>3"
11061 [(match_operand:SSEMODE124 0 "register_operand" "")
11062 (match_operand:SSEMODE124 1 "register_operand" "")
11063 (match_operand:SSEMODE124 2 "register_operand" "")]
11066 rtx neg = gen_reg_rtx (<MODE>mode);
11067 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11068 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11072 (define_expand "vashr<mode>3"
11073 [(match_operand:SSEMODE124 0 "register_operand" "")
11074 (match_operand:SSEMODE124 1 "register_operand" "")
11075 (match_operand:SSEMODE124 2 "register_operand" "")]
11078 rtx neg = gen_reg_rtx (<MODE>mode);
11079 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11080 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11084 (define_expand "vashl<mode>3"
11085 [(match_operand:SSEMODE124 0 "register_operand" "")
11086 (match_operand:SSEMODE124 1 "register_operand" "")
11087 (match_operand:SSEMODE124 2 "register_operand" "")]
11090 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11094 (define_insn "xop_ashl<mode>3"
11095 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11096 (if_then_else:SSEMODE1248
11098 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11100 (ashift:SSEMODE1248
11101 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11103 (ashiftrt:SSEMODE1248
11105 (neg:SSEMODE1248 (match_dup 2)))))]
11106 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11107 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11108 [(set_attr "type" "sseishft")
11109 (set_attr "prefix_data16" "0")
11110 (set_attr "prefix_extra" "2")
11111 (set_attr "mode" "TI")])
11113 (define_insn "xop_lshl<mode>3"
11114 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11115 (if_then_else:SSEMODE1248
11117 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11119 (ashift:SSEMODE1248
11120 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11122 (lshiftrt:SSEMODE1248
11124 (neg:SSEMODE1248 (match_dup 2)))))]
11125 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11126 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11127 [(set_attr "type" "sseishft")
11128 (set_attr "prefix_data16" "0")
11129 (set_attr "prefix_extra" "2")
11130 (set_attr "mode" "TI")])
11132 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11133 (define_expand "ashlv16qi3"
11134 [(match_operand:V16QI 0 "register_operand" "")
11135 (match_operand:V16QI 1 "register_operand" "")
11136 (match_operand:SI 2 "nonmemory_operand" "")]
11139 rtvec vs = rtvec_alloc (16);
11140 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11141 rtx reg = gen_reg_rtx (V16QImode);
11143 for (i = 0; i < 16; i++)
11144 RTVEC_ELT (vs, i) = operands[2];
11146 emit_insn (gen_vec_initv16qi (reg, par));
11147 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11151 (define_expand "lshlv16qi3"
11152 [(match_operand:V16QI 0 "register_operand" "")
11153 (match_operand:V16QI 1 "register_operand" "")
11154 (match_operand:SI 2 "nonmemory_operand" "")]
11157 rtvec vs = rtvec_alloc (16);
11158 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11159 rtx reg = gen_reg_rtx (V16QImode);
11161 for (i = 0; i < 16; i++)
11162 RTVEC_ELT (vs, i) = operands[2];
11164 emit_insn (gen_vec_initv16qi (reg, par));
11165 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11169 (define_expand "ashrv16qi3"
11170 [(match_operand:V16QI 0 "register_operand" "")
11171 (match_operand:V16QI 1 "register_operand" "")
11172 (match_operand:SI 2 "nonmemory_operand" "")]
11175 rtvec vs = rtvec_alloc (16);
11176 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11177 rtx reg = gen_reg_rtx (V16QImode);
11179 rtx ele = ((CONST_INT_P (operands[2]))
11180 ? GEN_INT (- INTVAL (operands[2]))
11183 for (i = 0; i < 16; i++)
11184 RTVEC_ELT (vs, i) = ele;
11186 emit_insn (gen_vec_initv16qi (reg, par));
11188 if (!CONST_INT_P (operands[2]))
11190 rtx neg = gen_reg_rtx (V16QImode);
11191 emit_insn (gen_negv16qi2 (neg, reg));
11192 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11195 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11200 (define_expand "ashrv2di3"
11201 [(match_operand:V2DI 0 "register_operand" "")
11202 (match_operand:V2DI 1 "register_operand" "")
11203 (match_operand:DI 2 "nonmemory_operand" "")]
11206 rtvec vs = rtvec_alloc (2);
11207 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11208 rtx reg = gen_reg_rtx (V2DImode);
11211 if (CONST_INT_P (operands[2]))
11212 ele = GEN_INT (- INTVAL (operands[2]));
11213 else if (GET_MODE (operands[2]) != DImode)
11215 rtx move = gen_reg_rtx (DImode);
11216 ele = gen_reg_rtx (DImode);
11217 convert_move (move, operands[2], false);
11218 emit_insn (gen_negdi2 (ele, move));
11222 ele = gen_reg_rtx (DImode);
11223 emit_insn (gen_negdi2 (ele, operands[2]));
11226 RTVEC_ELT (vs, 0) = ele;
11227 RTVEC_ELT (vs, 1) = ele;
11228 emit_insn (gen_vec_initv2di (reg, par));
11229 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11233 ;; XOP FRCZ support
11234 (define_insn "xop_frcz<mode>2"
11235 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11237 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11240 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11241 [(set_attr "type" "ssecvt1")
11242 (set_attr "mode" "<MODE>")])
11245 (define_expand "xop_vmfrcz<mode>2"
11246 [(set (match_operand:SSEMODEF2P 0 "register_operand")
11247 (vec_merge:SSEMODEF2P
11249 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
11255 operands[3] = CONST0_RTX (<MODE>mode);
11258 (define_insn "*xop_vmfrcz_<mode>"
11259 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11260 (vec_merge:SSEMODEF2P
11262 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11264 (match_operand:SSEMODEF2P 2 "const0_operand")
11267 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11268 [(set_attr "type" "ssecvt1")
11269 (set_attr "mode" "<MODE>")])
11271 (define_insn "xop_maskcmp<mode>3"
11272 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11273 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11274 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11275 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11277 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11278 [(set_attr "type" "sse4arg")
11279 (set_attr "prefix_data16" "0")
11280 (set_attr "prefix_rep" "0")
11281 (set_attr "prefix_extra" "2")
11282 (set_attr "length_immediate" "1")
11283 (set_attr "mode" "TI")])
11285 (define_insn "xop_maskcmp_uns<mode>3"
11286 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11287 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11288 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11289 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11291 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11292 [(set_attr "type" "ssecmp")
11293 (set_attr "prefix_data16" "0")
11294 (set_attr "prefix_rep" "0")
11295 (set_attr "prefix_extra" "2")
11296 (set_attr "length_immediate" "1")
11297 (set_attr "mode" "TI")])
11299 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11300 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11301 ;; the exact instruction generated for the intrinsic.
11302 (define_insn "xop_maskcmp_uns2<mode>3"
11303 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11304 (unspec:SSEMODE1248
11305 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11306 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11307 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11308 UNSPEC_XOP_UNSIGNED_CMP))]
11310 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11311 [(set_attr "type" "ssecmp")
11312 (set_attr "prefix_data16" "0")
11313 (set_attr "prefix_extra" "2")
11314 (set_attr "length_immediate" "1")
11315 (set_attr "mode" "TI")])
11317 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11318 ;; being added here to be complete.
11319 (define_insn "xop_pcom_tf<mode>3"
11320 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11321 (unspec:SSEMODE1248
11322 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11323 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11324 (match_operand:SI 3 "const_int_operand" "n")]
11325 UNSPEC_XOP_TRUEFALSE))]
11328 return ((INTVAL (operands[3]) != 0)
11329 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11330 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11332 [(set_attr "type" "ssecmp")
11333 (set_attr "prefix_data16" "0")
11334 (set_attr "prefix_extra" "2")
11335 (set_attr "length_immediate" "1")
11336 (set_attr "mode" "TI")])
11338 (define_insn "xop_vpermil2<mode>3"
11339 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11341 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11342 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11343 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11344 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11347 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11348 [(set_attr "type" "sse4arg")
11349 (set_attr "length_immediate" "1")
11350 (set_attr "mode" "<MODE>")])
11352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11353 (define_insn "*avx_aesenc"
11354 [(set (match_operand:V2DI 0 "register_operand" "=x")
11355 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11356 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11358 "TARGET_AES && TARGET_AVX"
11359 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11360 [(set_attr "type" "sselog1")
11361 (set_attr "prefix_extra" "1")
11362 (set_attr "prefix" "vex")
11363 (set_attr "mode" "TI")])
11365 (define_insn "aesenc"
11366 [(set (match_operand:V2DI 0 "register_operand" "=x")
11367 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11368 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11371 "aesenc\t{%2, %0|%0, %2}"
11372 [(set_attr "type" "sselog1")
11373 (set_attr "prefix_extra" "1")
11374 (set_attr "mode" "TI")])
11376 (define_insn "*avx_aesenclast"
11377 [(set (match_operand:V2DI 0 "register_operand" "=x")
11378 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11379 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11380 UNSPEC_AESENCLAST))]
11381 "TARGET_AES && TARGET_AVX"
11382 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11383 [(set_attr "type" "sselog1")
11384 (set_attr "prefix_extra" "1")
11385 (set_attr "prefix" "vex")
11386 (set_attr "mode" "TI")])
11388 (define_insn "aesenclast"
11389 [(set (match_operand:V2DI 0 "register_operand" "=x")
11390 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11391 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11392 UNSPEC_AESENCLAST))]
11394 "aesenclast\t{%2, %0|%0, %2}"
11395 [(set_attr "type" "sselog1")
11396 (set_attr "prefix_extra" "1")
11397 (set_attr "mode" "TI")])
11399 (define_insn "*avx_aesdec"
11400 [(set (match_operand:V2DI 0 "register_operand" "=x")
11401 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11402 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11404 "TARGET_AES && TARGET_AVX"
11405 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11406 [(set_attr "type" "sselog1")
11407 (set_attr "prefix_extra" "1")
11408 (set_attr "prefix" "vex")
11409 (set_attr "mode" "TI")])
11411 (define_insn "aesdec"
11412 [(set (match_operand:V2DI 0 "register_operand" "=x")
11413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11414 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11417 "aesdec\t{%2, %0|%0, %2}"
11418 [(set_attr "type" "sselog1")
11419 (set_attr "prefix_extra" "1")
11420 (set_attr "mode" "TI")])
11422 (define_insn "*avx_aesdeclast"
11423 [(set (match_operand:V2DI 0 "register_operand" "=x")
11424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11425 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11426 UNSPEC_AESDECLAST))]
11427 "TARGET_AES && TARGET_AVX"
11428 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11429 [(set_attr "type" "sselog1")
11430 (set_attr "prefix_extra" "1")
11431 (set_attr "prefix" "vex")
11432 (set_attr "mode" "TI")])
11434 (define_insn "aesdeclast"
11435 [(set (match_operand:V2DI 0 "register_operand" "=x")
11436 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11437 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11438 UNSPEC_AESDECLAST))]
11440 "aesdeclast\t{%2, %0|%0, %2}"
11441 [(set_attr "type" "sselog1")
11442 (set_attr "prefix_extra" "1")
11443 (set_attr "mode" "TI")])
11445 (define_insn "aesimc"
11446 [(set (match_operand:V2DI 0 "register_operand" "=x")
11447 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11450 "%vaesimc\t{%1, %0|%0, %1}"
11451 [(set_attr "type" "sselog1")
11452 (set_attr "prefix_extra" "1")
11453 (set_attr "prefix" "maybe_vex")
11454 (set_attr "mode" "TI")])
11456 (define_insn "aeskeygenassist"
11457 [(set (match_operand:V2DI 0 "register_operand" "=x")
11458 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11459 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11460 UNSPEC_AESKEYGENASSIST))]
11462 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11463 [(set_attr "type" "sselog1")
11464 (set_attr "prefix_extra" "1")
11465 (set_attr "length_immediate" "1")
11466 (set_attr "prefix" "maybe_vex")
11467 (set_attr "mode" "TI")])
11469 (define_insn "*vpclmulqdq"
11470 [(set (match_operand:V2DI 0 "register_operand" "=x")
11471 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11472 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11473 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11475 "TARGET_PCLMUL && TARGET_AVX"
11476 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11477 [(set_attr "type" "sselog1")
11478 (set_attr "prefix_extra" "1")
11479 (set_attr "length_immediate" "1")
11480 (set_attr "prefix" "vex")
11481 (set_attr "mode" "TI")])
11483 (define_insn "pclmulqdq"
11484 [(set (match_operand:V2DI 0 "register_operand" "=x")
11485 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11486 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11487 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11490 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11491 [(set_attr "type" "sselog1")
11492 (set_attr "prefix_extra" "1")
11493 (set_attr "length_immediate" "1")
11494 (set_attr "mode" "TI")])
11496 (define_expand "avx_vzeroall"
11497 [(match_par_dup 0 [(const_int 0)])]
11500 int nregs = TARGET_64BIT ? 16 : 8;
11503 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11505 XVECEXP (operands[0], 0, 0)
11506 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11509 for (regno = 0; regno < nregs; regno++)
11510 XVECEXP (operands[0], 0, regno + 1)
11511 = gen_rtx_SET (VOIDmode,
11512 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11513 CONST0_RTX (V8SImode));
11516 (define_insn "*avx_vzeroall"
11517 [(match_parallel 0 "vzeroall_operation"
11518 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11521 [(set_attr "type" "sse")
11522 (set_attr "modrm" "0")
11523 (set_attr "memory" "none")
11524 (set_attr "prefix" "vex")
11525 (set_attr "mode" "OI")])
11527 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11528 ;; if the upper 128bits are unused.
11529 (define_insn "avx_vzeroupper"
11530 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11531 UNSPECV_VZEROUPPER)]
11534 [(set_attr "type" "sse")
11535 (set_attr "modrm" "0")
11536 (set_attr "memory" "none")
11537 (set_attr "prefix" "vex")
11538 (set_attr "mode" "OI")])
11540 (define_insn_and_split "vec_dup<mode>"
11541 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11542 (vec_duplicate:AVX256MODE24P
11543 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11546 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11548 "&& reload_completed && REG_P (operands[1])"
11549 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11550 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11551 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11552 [(set_attr "type" "ssemov")
11553 (set_attr "prefix_extra" "1")
11554 (set_attr "prefix" "vex")
11555 (set_attr "mode" "V8SF")])
11557 (define_insn "avx_vbroadcastf128_<mode>"
11558 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11559 (vec_concat:AVX256MODE
11560 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11564 vbroadcastf128\t{%1, %0|%0, %1}
11565 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11566 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11567 [(set_attr "type" "ssemov,sselog1,sselog1")
11568 (set_attr "prefix_extra" "1")
11569 (set_attr "length_immediate" "0,1,1")
11570 (set_attr "prefix" "vex")
11571 (set_attr "mode" "V4SF,V8SF,V8SF")])
11573 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11574 ;; If it so happens that the input is in memory, use vbroadcast.
11575 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11576 (define_insn "*avx_vperm_broadcast_v4sf"
11577 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11579 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11580 (match_parallel 2 "avx_vbroadcast_operand"
11581 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11584 int elt = INTVAL (operands[3]);
11585 switch (which_alternative)
11589 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11590 return "vbroadcastss\t{%1, %0|%0, %1}";
11592 operands[2] = GEN_INT (elt * 0x55);
11593 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11595 gcc_unreachable ();
11598 [(set_attr "type" "ssemov,ssemov,sselog1")
11599 (set_attr "prefix_extra" "1")
11600 (set_attr "length_immediate" "0,0,1")
11601 (set_attr "prefix" "vex")
11602 (set_attr "mode" "SF,SF,V4SF")])
11604 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11605 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11606 (vec_select:AVX256MODEF2P
11607 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11608 (match_parallel 2 "avx_vbroadcast_operand"
11609 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11612 "&& reload_completed"
11613 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11615 rtx op0 = operands[0], op1 = operands[1];
11616 int elt = INTVAL (operands[3]);
11622 /* Shuffle element we care about into all elements of the 128-bit lane.
11623 The other lane gets shuffled too, but we don't care. */
11624 if (<MODE>mode == V4DFmode)
11625 mask = (elt & 1 ? 15 : 0);
11627 mask = (elt & 3) * 0x55;
11628 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11630 /* Shuffle the lane we care about into both lanes of the dest. */
11631 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11632 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11636 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11637 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11640 (define_expand "avx_vpermil<mode>"
11641 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11642 (vec_select:AVXMODEFDP
11643 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11644 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11647 int mask = INTVAL (operands[2]);
11648 rtx perm[<ssescalarnum>];
11650 perm[0] = GEN_INT (mask & 1);
11651 perm[1] = GEN_INT ((mask >> 1) & 1);
11652 if (<MODE>mode == V4DFmode)
11654 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11655 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11659 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11662 (define_expand "avx_vpermil<mode>"
11663 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11664 (vec_select:AVXMODEFSP
11665 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11666 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11669 int mask = INTVAL (operands[2]);
11670 rtx perm[<ssescalarnum>];
11672 perm[0] = GEN_INT (mask & 3);
11673 perm[1] = GEN_INT ((mask >> 2) & 3);
11674 perm[2] = GEN_INT ((mask >> 4) & 3);
11675 perm[3] = GEN_INT ((mask >> 6) & 3);
11676 if (<MODE>mode == V8SFmode)
11678 perm[4] = GEN_INT ((mask & 3) + 4);
11679 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11680 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11681 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11685 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11688 (define_insn "*avx_vpermilp<mode>"
11689 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11690 (vec_select:AVXMODEF2P
11691 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11692 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11693 [(match_operand 3 "const_int_operand" "")])))]
11696 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11697 operands[2] = GEN_INT (mask);
11698 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11700 [(set_attr "type" "sselog")
11701 (set_attr "prefix_extra" "1")
11702 (set_attr "length_immediate" "1")
11703 (set_attr "prefix" "vex")
11704 (set_attr "mode" "<MODE>")])
11706 (define_insn "avx_vpermilvar<mode>3"
11707 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11709 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11710 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11713 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11714 [(set_attr "type" "sselog")
11715 (set_attr "prefix_extra" "1")
11716 (set_attr "prefix" "vex")
11717 (set_attr "mode" "<MODE>")])
11719 (define_expand "avx_vperm2f128<mode>3"
11720 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11721 (unspec:AVX256MODE2P
11722 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11723 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11724 (match_operand:SI 3 "const_0_to_255_operand" "")]
11725 UNSPEC_VPERMIL2F128))]
11728 int mask = INTVAL (operands[3]);
11729 if ((mask & 0x88) == 0)
11731 rtx perm[<ssescalarnum>], t1, t2;
11732 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11734 base = (mask & 3) * nelt2;
11735 for (i = 0; i < nelt2; ++i)
11736 perm[i] = GEN_INT (base + i);
11738 base = ((mask >> 4) & 3) * nelt2;
11739 for (i = 0; i < nelt2; ++i)
11740 perm[i + nelt2] = GEN_INT (base + i);
11742 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11743 operands[1], operands[2]);
11744 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11745 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11746 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11752 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11753 ;; means that in order to represent this properly in rtl we'd have to
11754 ;; nest *another* vec_concat with a zero operand and do the select from
11755 ;; a 4x wide vector. That doesn't seem very nice.
11756 (define_insn "*avx_vperm2f128<mode>_full"
11757 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11758 (unspec:AVX256MODE2P
11759 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11760 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11761 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11762 UNSPEC_VPERMIL2F128))]
11764 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "prefix_extra" "1")
11767 (set_attr "length_immediate" "1")
11768 (set_attr "prefix" "vex")
11769 (set_attr "mode" "V8SF")])
11771 (define_insn "*avx_vperm2f128<mode>_nozero"
11772 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11773 (vec_select:AVX256MODE2P
11774 (vec_concat:<ssedoublesizemode>
11775 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11776 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11777 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11778 [(match_operand 4 "const_int_operand" "")])))]
11781 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11782 operands[3] = GEN_INT (mask);
11783 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11785 [(set_attr "type" "sselog")
11786 (set_attr "prefix_extra" "1")
11787 (set_attr "length_immediate" "1")
11788 (set_attr "prefix" "vex")
11789 (set_attr "mode" "V8SF")])
11791 (define_expand "avx_vinsertf128<mode>"
11792 [(match_operand:AVX256MODE 0 "register_operand" "")
11793 (match_operand:AVX256MODE 1 "register_operand" "")
11794 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11795 (match_operand:SI 3 "const_0_to_1_operand" "")]
11798 rtx (*insn)(rtx, rtx, rtx);
11800 switch (INTVAL (operands[3]))
11803 insn = gen_vec_set_lo_<mode>;
11806 insn = gen_vec_set_hi_<mode>;
11809 gcc_unreachable ();
11812 emit_insn (insn (operands[0], operands[1], operands[2]));
11816 (define_insn "vec_set_lo_<mode>"
11817 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11818 (vec_concat:AVX256MODE4P
11819 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11820 (vec_select:<avxhalfvecmode>
11821 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11822 (parallel [(const_int 2) (const_int 3)]))))]
11824 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11825 [(set_attr "type" "sselog")
11826 (set_attr "prefix_extra" "1")
11827 (set_attr "length_immediate" "1")
11828 (set_attr "prefix" "vex")
11829 (set_attr "mode" "V8SF")])
11831 (define_insn "vec_set_hi_<mode>"
11832 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11833 (vec_concat:AVX256MODE4P
11834 (vec_select:<avxhalfvecmode>
11835 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11836 (parallel [(const_int 0) (const_int 1)]))
11837 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11839 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11840 [(set_attr "type" "sselog")
11841 (set_attr "prefix_extra" "1")
11842 (set_attr "length_immediate" "1")
11843 (set_attr "prefix" "vex")
11844 (set_attr "mode" "V8SF")])
11846 (define_insn "vec_set_lo_<mode>"
11847 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11848 (vec_concat:AVX256MODE8P
11849 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11850 (vec_select:<avxhalfvecmode>
11851 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11852 (parallel [(const_int 4) (const_int 5)
11853 (const_int 6) (const_int 7)]))))]
11855 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11856 [(set_attr "type" "sselog")
11857 (set_attr "prefix_extra" "1")
11858 (set_attr "length_immediate" "1")
11859 (set_attr "prefix" "vex")
11860 (set_attr "mode" "V8SF")])
11862 (define_insn "vec_set_hi_<mode>"
11863 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11864 (vec_concat:AVX256MODE8P
11865 (vec_select:<avxhalfvecmode>
11866 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11867 (parallel [(const_int 0) (const_int 1)
11868 (const_int 2) (const_int 3)]))
11869 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11871 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11872 [(set_attr "type" "sselog")
11873 (set_attr "prefix_extra" "1")
11874 (set_attr "length_immediate" "1")
11875 (set_attr "prefix" "vex")
11876 (set_attr "mode" "V8SF")])
11878 (define_insn "vec_set_lo_v16hi"
11879 [(set (match_operand:V16HI 0 "register_operand" "=x")
11881 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11883 (match_operand:V16HI 1 "register_operand" "x")
11884 (parallel [(const_int 8) (const_int 9)
11885 (const_int 10) (const_int 11)
11886 (const_int 12) (const_int 13)
11887 (const_int 14) (const_int 15)]))))]
11889 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11890 [(set_attr "type" "sselog")
11891 (set_attr "prefix_extra" "1")
11892 (set_attr "length_immediate" "1")
11893 (set_attr "prefix" "vex")
11894 (set_attr "mode" "V8SF")])
11896 (define_insn "vec_set_hi_v16hi"
11897 [(set (match_operand:V16HI 0 "register_operand" "=x")
11900 (match_operand:V16HI 1 "register_operand" "x")
11901 (parallel [(const_int 0) (const_int 1)
11902 (const_int 2) (const_int 3)
11903 (const_int 4) (const_int 5)
11904 (const_int 6) (const_int 7)]))
11905 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11907 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11908 [(set_attr "type" "sselog")
11909 (set_attr "prefix_extra" "1")
11910 (set_attr "length_immediate" "1")
11911 (set_attr "prefix" "vex")
11912 (set_attr "mode" "V8SF")])
11914 (define_insn "vec_set_lo_v32qi"
11915 [(set (match_operand:V32QI 0 "register_operand" "=x")
11917 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11919 (match_operand:V32QI 1 "register_operand" "x")
11920 (parallel [(const_int 16) (const_int 17)
11921 (const_int 18) (const_int 19)
11922 (const_int 20) (const_int 21)
11923 (const_int 22) (const_int 23)
11924 (const_int 24) (const_int 25)
11925 (const_int 26) (const_int 27)
11926 (const_int 28) (const_int 29)
11927 (const_int 30) (const_int 31)]))))]
11929 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11930 [(set_attr "type" "sselog")
11931 (set_attr "prefix_extra" "1")
11932 (set_attr "length_immediate" "1")
11933 (set_attr "prefix" "vex")
11934 (set_attr "mode" "V8SF")])
11936 (define_insn "vec_set_hi_v32qi"
11937 [(set (match_operand:V32QI 0 "register_operand" "=x")
11940 (match_operand:V32QI 1 "register_operand" "x")
11941 (parallel [(const_int 0) (const_int 1)
11942 (const_int 2) (const_int 3)
11943 (const_int 4) (const_int 5)
11944 (const_int 6) (const_int 7)
11945 (const_int 8) (const_int 9)
11946 (const_int 10) (const_int 11)
11947 (const_int 12) (const_int 13)
11948 (const_int 14) (const_int 15)]))
11949 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11951 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11952 [(set_attr "type" "sselog")
11953 (set_attr "prefix_extra" "1")
11954 (set_attr "length_immediate" "1")
11955 (set_attr "prefix" "vex")
11956 (set_attr "mode" "V8SF")])
11958 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11959 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11961 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11962 (match_operand:<avxpermvecmode> 2 "register_operand" "x")
11966 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11967 [(set_attr "type" "sselog1")
11968 (set_attr "prefix_extra" "1")
11969 (set_attr "prefix" "vex")
11970 (set_attr "mode" "<MODE>")])
11972 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11973 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11975 [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
11976 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11978 UNSPEC_MASKSTORE))]
11980 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11981 [(set_attr "type" "sselog1")
11982 (set_attr "prefix_extra" "1")
11983 (set_attr "prefix" "vex")
11984 (set_attr "mode" "<MODE>")])
11986 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11987 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11988 (unspec:AVX256MODE2P
11989 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11993 "&& reload_completed"
11996 rtx op1 = operands[1];
11998 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12000 op1 = gen_lowpart (<MODE>mode, op1);
12001 emit_move_insn (operands[0], op1);
12005 (define_expand "vec_init<mode>"
12006 [(match_operand:AVX256MODE 0 "register_operand" "")
12007 (match_operand 1 "" "")]
12010 ix86_expand_vector_init (false, operands[0], operands[1]);
12014 (define_insn "*vec_concat<mode>_avx"
12015 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12016 (vec_concat:AVX256MODE
12017 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12018 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12021 switch (which_alternative)
12024 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12026 switch (get_attr_mode (insn))
12029 return "vmovaps\t{%1, %x0|%x0, %1}";
12031 return "vmovapd\t{%1, %x0|%x0, %1}";
12033 return "vmovdqa\t{%1, %x0|%x0, %1}";
12036 gcc_unreachable ();
12039 [(set_attr "type" "sselog,ssemov")
12040 (set_attr "prefix_extra" "1,*")
12041 (set_attr "length_immediate" "1,*")
12042 (set_attr "prefix" "vex")
12043 (set_attr "mode" "<avxvecmode>")])
12045 (define_insn "vcvtph2ps"
12046 [(set (match_operand:V4SF 0 "register_operand" "=x")
12048 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12050 (parallel [(const_int 0) (const_int 1)
12051 (const_int 1) (const_int 2)])))]
12053 "vcvtph2ps\t{%1, %0|%0, %1}"
12054 [(set_attr "type" "ssecvt")
12055 (set_attr "prefix" "vex")
12056 (set_attr "mode" "V4SF")])
12058 (define_insn "*vcvtph2ps_load"
12059 [(set (match_operand:V4SF 0 "register_operand" "=x")
12060 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12061 UNSPEC_VCVTPH2PS))]
12063 "vcvtph2ps\t{%1, %0|%0, %1}"
12064 [(set_attr "type" "ssecvt")
12065 (set_attr "prefix" "vex")
12066 (set_attr "mode" "V8SF")])
12068 (define_insn "vcvtph2ps256"
12069 [(set (match_operand:V8SF 0 "register_operand" "=x")
12070 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12071 UNSPEC_VCVTPH2PS))]
12073 "vcvtph2ps\t{%1, %0|%0, %1}"
12074 [(set_attr "type" "ssecvt")
12075 (set_attr "prefix" "vex")
12076 (set_attr "mode" "V8SF")])
12078 (define_expand "vcvtps2ph"
12079 [(set (match_operand:V8HI 0 "register_operand" "")
12081 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12082 (match_operand:SI 2 "immediate_operand" "")]
12086 "operands[3] = CONST0_RTX (V4HImode);")
12088 (define_insn "*vcvtps2ph"
12089 [(set (match_operand:V8HI 0 "register_operand" "=x")
12091 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12092 (match_operand:SI 2 "immediate_operand" "N")]
12094 (match_operand:V4HI 3 "const0_operand" "")))]
12096 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12097 [(set_attr "type" "ssecvt")
12098 (set_attr "prefix" "vex")
12099 (set_attr "mode" "V4SF")])
12101 (define_insn "*vcvtps2ph_store"
12102 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12103 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12104 (match_operand:SI 2 "immediate_operand" "N")]
12105 UNSPEC_VCVTPS2PH))]
12107 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12108 [(set_attr "type" "ssecvt")
12109 (set_attr "prefix" "vex")
12110 (set_attr "mode" "V4SF")])
12112 (define_insn "vcvtps2ph256"
12113 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12114 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12115 (match_operand:SI 2 "immediate_operand" "N")]
12116 UNSPEC_VCVTPS2PH))]
12118 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12119 [(set_attr "type" "ssecvt")
12120 (set_attr "prefix" "vex")
12121 (set_attr "mode" "V8SF")])