1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
59 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
61 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
62 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
63 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
64 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
65 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
66 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
67 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
68 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
69 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
71 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
73 ;; Int-float size matches
74 (define_mode_iterator SSEMODE4S [V4SF V4SI])
75 (define_mode_iterator SSEMODE2D [V2DF V2DI])
77 ;; Modes handled by integer vcond pattern
78 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
79 (V2DI "TARGET_SSE4_2")])
81 ;; Modes handled by vec_extract_even/odd pattern.
82 (define_mode_iterator SSEMODE_EO
85 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
86 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
87 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
89 ;; Mapping from float mode to required SSE level
90 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
92 ;; Mapping from integer vector mode to mnemonic suffix
93 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
95 ;; Mapping of the insn mnemonic suffix
96 (define_mode_attr ssemodesuffix
97 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
98 (V8SI "ps") (V4DI "pd")])
99 (define_mode_attr ssescalarmodesuffix
100 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
103 ;; Mapping of the max integer size for xop rotate immediate constraint
104 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
106 ;; Mapping of vector modes back to the scalar modes
107 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
108 (V16QI "QI") (V8HI "HI")
109 (V4SI "SI") (V2DI "DI")])
111 ;; Mapping of vector modes to a vector mode of double size
112 (define_mode_attr ssedoublesizemode
113 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
114 (V8HI "V16HI") (V16QI "V32QI")
115 (V4DF "V8DF") (V8SF "V16SF")
116 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
118 ;; Number of scalar elements in each vector type
119 (define_mode_attr ssescalarnum
120 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
121 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
124 (define_mode_attr avxvecmode
125 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
126 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
127 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
128 (define_mode_attr avxvecpsmode
129 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
130 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
131 (define_mode_attr avxhalfvecmode
132 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
133 (V8SF "V4SF") (V4DF "V2DF")
134 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
135 (define_mode_attr avxscalarmode
136 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
137 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
138 (define_mode_attr avxcvtvecmode
139 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
140 (define_mode_attr avxpermvecmode
141 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
142 (define_mode_attr avxmodesuffixp
143 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
145 (define_mode_attr avxmodesuffix
146 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
147 (V8SI "256") (V8SF "256") (V4DF "256")])
149 ;; Mapping of immediate bits for blend instructions
150 (define_mode_attr blendbits
151 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
153 ;; Mapping of immediate bits for pinsr instructions
154 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
156 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164 (define_expand "mov<mode>"
165 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
166 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
169 ix86_expand_vector_move (<MODE>mode, operands);
173 (define_insn "*avx_mov<mode>_internal"
174 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
175 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
177 && (register_operand (operands[0], <MODE>mode)
178 || register_operand (operands[1], <MODE>mode))"
180 switch (which_alternative)
183 return standard_sse_constant_opcode (insn, operands[1]);
186 switch (get_attr_mode (insn))
190 return "vmovaps\t{%1, %0|%0, %1}";
193 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
194 return "vmovaps\t{%1, %0|%0, %1}";
196 return "vmovapd\t{%1, %0|%0, %1}";
198 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
199 return "vmovaps\t{%1, %0|%0, %1}";
201 return "vmovdqa\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
208 (set_attr "prefix" "vex")
209 (set_attr "mode" "<avxvecmode>")])
211 ;; All of these patterns are enabled for SSE1 as well as SSE2.
212 ;; This is essential for maintaining stable calling conventions.
214 (define_expand "mov<mode>"
215 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
216 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
219 ix86_expand_vector_move (<MODE>mode, operands);
223 (define_insn "*mov<mode>_internal"
224 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
225 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
227 && (register_operand (operands[0], <MODE>mode)
228 || register_operand (operands[1], <MODE>mode))"
230 switch (which_alternative)
233 return standard_sse_constant_opcode (insn, operands[1]);
236 switch (get_attr_mode (insn))
239 return "movaps\t{%1, %0|%0, %1}";
241 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
242 return "movaps\t{%1, %0|%0, %1}";
244 return "movapd\t{%1, %0|%0, %1}";
246 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
247 return "movaps\t{%1, %0|%0, %1}";
249 return "movdqa\t{%1, %0|%0, %1}";
255 [(set_attr "type" "sselog1,ssemov,ssemov")
257 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
258 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
259 (and (eq_attr "alternative" "2")
260 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
262 (const_string "V4SF")
263 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
264 (const_string "V4SF")
265 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
266 (const_string "V2DF")
268 (const_string "TI")))])
270 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
271 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
272 ;; from memory, we'd prefer to load the memory directly into the %xmm
273 ;; register. To facilitate this happy circumstance, this pattern won't
274 ;; split until after register allocation. If the 64-bit value didn't
275 ;; come from memory, this is the best we can do. This is much better
276 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
279 (define_insn_and_split "movdi_to_sse"
281 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
282 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
283 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
284 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
286 "&& reload_completed"
289 if (register_operand (operands[1], DImode))
291 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
292 Assemble the 64-bit DImode value in an xmm register. */
293 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
294 gen_rtx_SUBREG (SImode, operands[1], 0)));
295 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
296 gen_rtx_SUBREG (SImode, operands[1], 4)));
297 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
300 else if (memory_operand (operands[1], DImode))
301 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
302 operands[1], const0_rtx));
308 [(set (match_operand:V4SF 0 "register_operand" "")
309 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
310 "TARGET_SSE && reload_completed"
313 (vec_duplicate:V4SF (match_dup 1))
317 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
318 operands[2] = CONST0_RTX (V4SFmode);
322 [(set (match_operand:V2DF 0 "register_operand" "")
323 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
324 "TARGET_SSE2 && reload_completed"
325 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
327 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
328 operands[2] = CONST0_RTX (DFmode);
331 (define_expand "push<mode>1"
332 [(match_operand:AVX256MODE 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "push<mode>1"
340 [(match_operand:SSEMODE16 0 "register_operand" "")]
343 ix86_expand_push (<MODE>mode, operands[0]);
347 (define_expand "movmisalign<mode>"
348 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
349 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
352 ix86_expand_vector_move_misalign (<MODE>mode, operands);
356 (define_expand "movmisalign<mode>"
357 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
358 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
361 ix86_expand_vector_move_misalign (<MODE>mode, operands);
365 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
366 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
368 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
370 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
371 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
372 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
373 [(set_attr "type" "ssemov")
374 (set_attr "movu" "1")
375 (set_attr "prefix" "vex")
376 (set_attr "mode" "<MODE>")])
378 (define_insn "sse2_movq128"
379 [(set (match_operand:V2DI 0 "register_operand" "=x")
382 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
383 (parallel [(const_int 0)]))
386 "%vmovq\t{%1, %0|%0, %1}"
387 [(set_attr "type" "ssemov")
388 (set_attr "prefix" "maybe_vex")
389 (set_attr "mode" "TI")])
391 (define_insn "<sse>_movu<ssemodesuffix>"
392 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
394 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
397 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
398 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
399 [(set_attr "type" "ssemov")
400 (set_attr "movu" "1")
401 (set_attr "mode" "<MODE>")])
403 (define_insn "avx_movdqu<avxmodesuffix>"
404 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
406 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
408 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
409 "vmovdqu\t{%1, %0|%0, %1}"
410 [(set_attr "type" "ssemov")
411 (set_attr "movu" "1")
412 (set_attr "prefix" "vex")
413 (set_attr "mode" "<avxvecmode>")])
415 (define_insn "sse2_movdqu"
416 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
417 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
419 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
420 "movdqu\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssemov")
422 (set_attr "movu" "1")
423 (set_attr "prefix_data16" "1")
424 (set_attr "mode" "TI")])
426 (define_insn "avx_movnt<mode>"
427 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
429 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
431 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
432 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
433 [(set_attr "type" "ssemov")
434 (set_attr "prefix" "vex")
435 (set_attr "mode" "<MODE>")])
437 (define_insn "<sse>_movnt<mode>"
438 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
440 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
442 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
443 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
444 [(set_attr "type" "ssemov")
445 (set_attr "mode" "<MODE>")])
447 (define_insn "avx_movnt<mode>"
448 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
450 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
453 "vmovntdq\t{%1, %0|%0, %1}"
454 [(set_attr "type" "ssecvt")
455 (set_attr "prefix" "vex")
456 (set_attr "mode" "<avxvecmode>")])
458 (define_insn "sse2_movntv2di"
459 [(set (match_operand:V2DI 0 "memory_operand" "=m")
460 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
463 "movntdq\t{%1, %0|%0, %1}"
464 [(set_attr "type" "ssemov")
465 (set_attr "prefix_data16" "1")
466 (set_attr "mode" "TI")])
468 (define_insn "sse2_movntsi"
469 [(set (match_operand:SI 0 "memory_operand" "=m")
470 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
473 "movnti\t{%1, %0|%0, %1}"
474 [(set_attr "type" "ssemov")
475 (set_attr "prefix_data16" "0")
476 (set_attr "mode" "V2DF")])
478 (define_insn "avx_lddqu<avxmodesuffix>"
479 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
481 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
484 "vlddqu\t{%1, %0|%0, %1}"
485 [(set_attr "type" "ssecvt")
486 (set_attr "movu" "1")
487 (set_attr "prefix" "vex")
488 (set_attr "mode" "<avxvecmode>")])
490 (define_insn "sse3_lddqu"
491 [(set (match_operand:V16QI 0 "register_operand" "=x")
492 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
495 "lddqu\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "movu" "1")
498 (set_attr "prefix_data16" "0")
499 (set_attr "prefix_rep" "1")
500 (set_attr "mode" "TI")])
502 ; Expand patterns for non-temporal stores. At the moment, only those
503 ; that directly map to insns are defined; it would be possible to
504 ; define patterns for other modes that would expand to several insns.
506 (define_expand "storent<mode>"
507 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
509 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)")
513 (define_expand "storent<mode>"
514 [(set (match_operand:MODEF 0 "memory_operand" "")
516 [(match_operand:MODEF 1 "register_operand" "")]
520 (define_expand "storentv2di"
521 [(set (match_operand:V2DI 0 "memory_operand" "")
522 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
526 (define_expand "storentsi"
527 [(set (match_operand:SI 0 "memory_operand" "")
528 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 ;; Parallel floating point arithmetic
536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
538 (define_expand "<code><mode>2"
539 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
541 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
542 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
543 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
545 (define_expand "<plusminus_insn><mode>3"
546 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
547 (plusminus:AVX256MODEF2P
548 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
549 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
550 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
551 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
553 (define_insn "*avx_<plusminus_insn><mode>3"
554 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
555 (plusminus:AVXMODEF2P
556 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
557 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
558 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
559 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
560 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<avxvecmode>")])
565 (define_expand "<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
567 (plusminus:SSEMODEF2P
568 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
570 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
571 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
573 (define_insn "*<plusminus_insn><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 (plusminus:SSEMODEF2P
576 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
577 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
579 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
580 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
581 [(set_attr "type" "sseadd")
582 (set_attr "mode" "<MODE>")])
584 (define_insn "*avx_vm<plusminus_insn><mode>3"
585 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 (vec_merge:SSEMODEF2P
587 (plusminus:SSEMODEF2P
588 (match_operand:SSEMODEF2P 1 "register_operand" "x")
589 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
592 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
593 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "sseadd")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<ssescalarmode>")])
598 (define_insn "<sse>_vm<plusminus_insn><mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
600 (vec_merge:SSEMODEF2P
601 (plusminus:SSEMODEF2P
602 (match_operand:SSEMODEF2P 1 "register_operand" "0")
603 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
606 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
607 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "<ssescalarmode>")])
611 (define_expand "mul<mode>3"
612 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
614 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
615 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
616 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
617 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
619 (define_insn "*avx_mul<mode>3"
620 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
622 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
623 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
624 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
625 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
626 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<avxvecmode>")])
631 (define_expand "mul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
634 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
636 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
639 (define_insn "*mul<mode>3"
640 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
643 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
646 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
647 [(set_attr "type" "ssemul")
648 (set_attr "mode" "<MODE>")])
650 (define_insn "*avx_vmmul<mode>3"
651 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
652 (vec_merge:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "register_operand" "x")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
658 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
659 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
660 [(set_attr "type" "ssemul")
661 (set_attr "prefix" "vex")
662 (set_attr "mode" "<ssescalarmode>")])
664 (define_insn "<sse>_vmmul<mode>3"
665 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
666 (vec_merge:SSEMODEF2P
668 (match_operand:SSEMODEF2P 1 "register_operand" "0")
669 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
672 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
673 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
674 [(set_attr "type" "ssemul")
675 (set_attr "mode" "<ssescalarmode>")])
677 (define_expand "divv8sf3"
678 [(set (match_operand:V8SF 0 "register_operand" "")
679 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
680 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
683 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
685 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
686 && flag_finite_math_only && !flag_trapping_math
687 && flag_unsafe_math_optimizations)
689 ix86_emit_swdivsf (operands[0], operands[1],
690 operands[2], V8SFmode);
695 (define_expand "divv4df3"
696 [(set (match_operand:V4DF 0 "register_operand" "")
697 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
698 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
700 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
702 (define_insn "avx_div<mode>3"
703 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
705 (match_operand:AVXMODEF2P 1 "register_operand" "x")
706 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
707 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
708 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
709 [(set_attr "type" "ssediv")
710 (set_attr "prefix" "vex")
711 (set_attr "mode" "<MODE>")])
713 (define_expand "divv4sf3"
714 [(set (match_operand:V4SF 0 "register_operand" "")
715 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
716 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
719 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
720 && flag_finite_math_only && !flag_trapping_math
721 && flag_unsafe_math_optimizations)
723 ix86_emit_swdivsf (operands[0], operands[1],
724 operands[2], V4SFmode);
729 (define_expand "divv2df3"
730 [(set (match_operand:V2DF 0 "register_operand" "")
731 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
732 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
735 (define_insn "*avx_div<mode>3"
736 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (match_operand:SSEMODEF2P 1 "register_operand" "x")
739 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
740 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
741 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "type" "ssediv")
743 (set_attr "prefix" "vex")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_div<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
749 (match_operand:SSEMODEF2P 1 "register_operand" "0")
750 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
751 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
752 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
753 [(set_attr "type" "ssediv")
754 (set_attr "mode" "<MODE>")])
756 (define_insn "*avx_vmdiv<mode>3"
757 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
758 (vec_merge:SSEMODEF2P
760 (match_operand:SSEMODEF2P 1 "register_operand" "x")
761 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
764 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
765 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
766 [(set_attr "type" "ssediv")
767 (set_attr "prefix" "vex")
768 (set_attr "mode" "<ssescalarmode>")])
770 (define_insn "<sse>_vmdiv<mode>3"
771 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
772 (vec_merge:SSEMODEF2P
774 (match_operand:SSEMODEF2P 1 "register_operand" "0")
775 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
778 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
779 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
780 [(set_attr "type" "ssediv")
781 (set_attr "mode" "<ssescalarmode>")])
783 (define_insn "avx_rcpv8sf2"
784 [(set (match_operand:V8SF 0 "register_operand" "=x")
786 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
788 "vrcpps\t{%1, %0|%0, %1}"
789 [(set_attr "type" "sse")
790 (set_attr "prefix" "vex")
791 (set_attr "mode" "V8SF")])
793 (define_insn "sse_rcpv4sf2"
794 [(set (match_operand:V4SF 0 "register_operand" "=x")
796 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
798 "%vrcpps\t{%1, %0|%0, %1}"
799 [(set_attr "type" "sse")
800 (set_attr "atom_sse_attr" "rcp")
801 (set_attr "prefix" "maybe_vex")
802 (set_attr "mode" "V4SF")])
804 (define_insn "*avx_vmrcpv4sf2"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
809 (match_operand:V4SF 2 "register_operand" "x")
812 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
813 [(set_attr "type" "sse")
814 (set_attr "prefix" "vex")
815 (set_attr "mode" "SF")])
817 (define_insn "sse_vmrcpv4sf2"
818 [(set (match_operand:V4SF 0 "register_operand" "=x")
820 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
822 (match_operand:V4SF 2 "register_operand" "0")
825 "rcpss\t{%1, %0|%0, %1}"
826 [(set_attr "type" "sse")
827 (set_attr "atom_sse_attr" "rcp")
828 (set_attr "mode" "SF")])
830 (define_expand "sqrtv8sf2"
831 [(set (match_operand:V8SF 0 "register_operand" "")
832 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
835 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
836 && flag_finite_math_only && !flag_trapping_math
837 && flag_unsafe_math_optimizations)
839 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
844 (define_insn "avx_sqrtv8sf2"
845 [(set (match_operand:V8SF 0 "register_operand" "=x")
846 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
848 "vsqrtps\t{%1, %0|%0, %1}"
849 [(set_attr "type" "sse")
850 (set_attr "prefix" "vex")
851 (set_attr "mode" "V8SF")])
853 (define_expand "sqrtv4sf2"
854 [(set (match_operand:V4SF 0 "register_operand" "")
855 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
858 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
859 && flag_finite_math_only && !flag_trapping_math
860 && flag_unsafe_math_optimizations)
862 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
867 (define_insn "sse_sqrtv4sf2"
868 [(set (match_operand:V4SF 0 "register_operand" "=x")
869 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
871 "%vsqrtps\t{%1, %0|%0, %1}"
872 [(set_attr "type" "sse")
873 (set_attr "atom_sse_attr" "sqrt")
874 (set_attr "prefix" "maybe_vex")
875 (set_attr "mode" "V4SF")])
877 (define_insn "sqrtv4df2"
878 [(set (match_operand:V4DF 0 "register_operand" "=x")
879 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
881 "vsqrtpd\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "prefix" "vex")
884 (set_attr "mode" "V4DF")])
886 (define_insn "sqrtv2df2"
887 [(set (match_operand:V2DF 0 "register_operand" "=x")
888 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
890 "%vsqrtpd\t{%1, %0|%0, %1}"
891 [(set_attr "type" "sse")
892 (set_attr "prefix" "maybe_vex")
893 (set_attr "mode" "V2DF")])
895 (define_insn "*avx_vmsqrt<mode>2"
896 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
897 (vec_merge:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
900 (match_operand:SSEMODEF2P 2 "register_operand" "x")
902 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
903 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
904 [(set_attr "type" "sse")
905 (set_attr "prefix" "vex")
906 (set_attr "mode" "<ssescalarmode>")])
908 (define_insn "<sse>_vmsqrt<mode>2"
909 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
910 (vec_merge:SSEMODEF2P
912 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
913 (match_operand:SSEMODEF2P 2 "register_operand" "0")
915 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
916 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "atom_sse_attr" "sqrt")
919 (set_attr "mode" "<ssescalarmode>")])
921 (define_expand "rsqrtv8sf2"
922 [(set (match_operand:V8SF 0 "register_operand" "")
924 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
925 "TARGET_AVX && TARGET_SSE_MATH"
927 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
931 (define_insn "avx_rsqrtv8sf2"
932 [(set (match_operand:V8SF 0 "register_operand" "=x")
934 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
936 "vrsqrtps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "sse")
938 (set_attr "prefix" "vex")
939 (set_attr "mode" "V8SF")])
941 (define_expand "rsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "")
944 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
947 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
951 (define_insn "sse_rsqrtv4sf2"
952 [(set (match_operand:V4SF 0 "register_operand" "=x")
954 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
956 "%vrsqrtps\t{%1, %0|%0, %1}"
957 [(set_attr "type" "sse")
958 (set_attr "prefix" "maybe_vex")
959 (set_attr "mode" "V4SF")])
961 (define_insn "*avx_vmrsqrtv4sf2"
962 [(set (match_operand:V4SF 0 "register_operand" "=x")
964 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
966 (match_operand:V4SF 2 "register_operand" "x")
969 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
970 [(set_attr "type" "sse")
971 (set_attr "prefix" "vex")
972 (set_attr "mode" "SF")])
974 (define_insn "sse_vmrsqrtv4sf2"
975 [(set (match_operand:V4SF 0 "register_operand" "=x")
977 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
979 (match_operand:V4SF 2 "register_operand" "0")
982 "rsqrtss\t{%1, %0|%0, %1}"
983 [(set_attr "type" "sse")
984 (set_attr "mode" "SF")])
986 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
987 ;; isn't really correct, as those rtl operators aren't defined when
988 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
990 (define_expand "<code><mode>3"
991 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
992 (smaxmin:AVX256MODEF2P
993 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
994 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
995 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
997 if (!flag_finite_math_only)
998 operands[1] = force_reg (<MODE>mode, operands[1]);
999 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1002 (define_expand "<code><mode>3"
1003 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1005 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1006 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1007 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1009 if (!flag_finite_math_only)
1010 operands[1] = force_reg (<MODE>mode, operands[1]);
1011 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1014 (define_insn "*avx_<code><mode>3_finite"
1015 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1017 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1018 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1019 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1020 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1021 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1022 [(set_attr "type" "sseadd")
1023 (set_attr "prefix" "vex")
1024 (set_attr "mode" "<MODE>")])
1026 (define_insn "*<code><mode>3_finite"
1027 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1031 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1032 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1033 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "sseadd")
1035 (set_attr "mode" "<MODE>")])
1037 (define_insn "*avx_<code><mode>3"
1038 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1040 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1041 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1042 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1043 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1044 [(set_attr "type" "sseadd")
1045 (set_attr "prefix" "vex")
1046 (set_attr "mode" "<avxvecmode>")])
1048 (define_insn "*<code><mode>3"
1049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1051 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1052 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1053 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1054 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1055 [(set_attr "type" "sseadd")
1056 (set_attr "mode" "<MODE>")])
1058 (define_insn "*avx_vm<code><mode>3"
1059 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1060 (vec_merge:SSEMODEF2P
1062 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1063 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1066 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1067 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1068 [(set_attr "type" "sse")
1069 (set_attr "prefix" "vex")
1070 (set_attr "mode" "<ssescalarmode>")])
1072 (define_insn "<sse>_vm<code><mode>3"
1073 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1074 (vec_merge:SSEMODEF2P
1076 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1077 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1080 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1081 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1082 [(set_attr "type" "sseadd")
1083 (set_attr "mode" "<ssescalarmode>")])
1085 ;; These versions of the min/max patterns implement exactly the operations
1086 ;; min = (op1 < op2 ? op1 : op2)
1087 ;; max = (!(op1 < op2) ? op1 : op2)
1088 ;; Their operands are not commutative, and thus they may be used in the
1089 ;; presence of -0.0 and NaN.
1091 (define_insn "*avx_ieee_smin<mode>3"
1092 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1094 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1095 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1097 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1098 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1099 [(set_attr "type" "sseadd")
1100 (set_attr "prefix" "vex")
1101 (set_attr "mode" "<avxvecmode>")])
1103 (define_insn "*avx_ieee_smax<mode>3"
1104 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1106 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1107 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1109 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1110 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "<avxvecmode>")])
1115 (define_insn "*ieee_smin<mode>3"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1118 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1119 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1121 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1122 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1123 [(set_attr "type" "sseadd")
1124 (set_attr "mode" "<MODE>")])
1126 (define_insn "*ieee_smax<mode>3"
1127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1129 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1130 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1132 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1133 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1134 [(set_attr "type" "sseadd")
1135 (set_attr "mode" "<MODE>")])
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1151 (define_insn "avx_addsubv4df3"
1152 [(set (match_operand:V4DF 0 "register_operand" "=x")
1155 (match_operand:V4DF 1 "register_operand" "x")
1156 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1157 (minus:V4DF (match_dup 1) (match_dup 2))
1160 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1161 [(set_attr "type" "sseadd")
1162 (set_attr "prefix" "vex")
1163 (set_attr "mode" "V4DF")])
1165 (define_insn "*avx_addsubv4sf3"
1166 [(set (match_operand:V4SF 0 "register_operand" "=x")
1169 (match_operand:V4SF 1 "register_operand" "x")
1170 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1171 (minus:V4SF (match_dup 1) (match_dup 2))
1174 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1175 [(set_attr "type" "sseadd")
1176 (set_attr "prefix" "vex")
1177 (set_attr "mode" "V4SF")])
1179 (define_insn "sse3_addsubv4sf3"
1180 [(set (match_operand:V4SF 0 "register_operand" "=x")
1183 (match_operand:V4SF 1 "register_operand" "0")
1184 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1185 (minus:V4SF (match_dup 1) (match_dup 2))
1188 "addsubps\t{%2, %0|%0, %2}"
1189 [(set_attr "type" "sseadd")
1190 (set_attr "prefix_rep" "1")
1191 (set_attr "mode" "V4SF")])
1193 (define_insn "*avx_addsubv2df3"
1194 [(set (match_operand:V2DF 0 "register_operand" "=x")
1197 (match_operand:V2DF 1 "register_operand" "x")
1198 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1199 (minus:V2DF (match_dup 1) (match_dup 2))
1202 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1203 [(set_attr "type" "sseadd")
1204 (set_attr "prefix" "vex")
1205 (set_attr "mode" "V2DF")])
1207 (define_insn "sse3_addsubv2df3"
1208 [(set (match_operand:V2DF 0 "register_operand" "=x")
1211 (match_operand:V2DF 1 "register_operand" "0")
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1213 (minus:V2DF (match_dup 1) (match_dup 2))
1216 "addsubpd\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "sseadd")
1218 (set_attr "atom_unit" "complex")
1219 (set_attr "mode" "V2DF")])
1221 (define_insn "avx_h<plusminus_insn>v4df3"
1222 [(set (match_operand:V4DF 0 "register_operand" "=x")
1227 (match_operand:V4DF 1 "register_operand" "x")
1228 (parallel [(const_int 0)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1232 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1236 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1237 (parallel [(const_int 0)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1241 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1243 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1244 [(set_attr "type" "sseadd")
1245 (set_attr "prefix" "vex")
1246 (set_attr "mode" "V4DF")])
1248 (define_insn "avx_h<plusminus_insn>v8sf3"
1249 [(set (match_operand:V8SF 0 "register_operand" "=x")
1255 (match_operand:V8SF 1 "register_operand" "x")
1256 (parallel [(const_int 0)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1260 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1264 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 0)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1269 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1286 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1287 [(set_attr "type" "sseadd")
1288 (set_attr "prefix" "vex")
1289 (set_attr "mode" "V8SF")])
1291 (define_insn "*avx_h<plusminus_insn>v4sf3"
1292 [(set (match_operand:V4SF 0 "register_operand" "=x")
1297 (match_operand:V4SF 1 "register_operand" "x")
1298 (parallel [(const_int 0)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1302 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1306 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1307 (parallel [(const_int 0)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1311 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1313 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1314 [(set_attr "type" "sseadd")
1315 (set_attr "prefix" "vex")
1316 (set_attr "mode" "V4SF")])
1318 (define_insn "sse3_h<plusminus_insn>v4sf3"
1319 [(set (match_operand:V4SF 0 "register_operand" "=x")
1324 (match_operand:V4SF 1 "register_operand" "0")
1325 (parallel [(const_int 0)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1329 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1333 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1334 (parallel [(const_int 0)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1338 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1340 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "atom_unit" "complex")
1343 (set_attr "prefix_rep" "1")
1344 (set_attr "mode" "V4SF")])
1346 (define_insn "*avx_h<plusminus_insn>v2df3"
1347 [(set (match_operand:V2DF 0 "register_operand" "=x")
1351 (match_operand:V2DF 1 "register_operand" "x")
1352 (parallel [(const_int 0)]))
1353 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1356 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1357 (parallel [(const_int 0)]))
1358 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1360 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1361 [(set_attr "type" "sseadd")
1362 (set_attr "prefix" "vex")
1363 (set_attr "mode" "V2DF")])
1365 (define_insn "sse3_h<plusminus_insn>v2df3"
1366 [(set (match_operand:V2DF 0 "register_operand" "=x")
1370 (match_operand:V2DF 1 "register_operand" "0")
1371 (parallel [(const_int 0)]))
1372 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1375 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1376 (parallel [(const_int 0)]))
1377 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1379 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1380 [(set_attr "type" "sseadd")
1381 (set_attr "mode" "V2DF")])
1383 (define_expand "reduc_splus_v4sf"
1384 [(match_operand:V4SF 0 "register_operand" "")
1385 (match_operand:V4SF 1 "register_operand" "")]
1390 rtx tmp = gen_reg_rtx (V4SFmode);
1391 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1392 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1395 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1399 (define_expand "reduc_splus_v2df"
1400 [(match_operand:V2DF 0 "register_operand" "")
1401 (match_operand:V2DF 1 "register_operand" "")]
1404 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1408 (define_expand "reduc_smax_v4sf"
1409 [(match_operand:V4SF 0 "register_operand" "")
1410 (match_operand:V4SF 1 "register_operand" "")]
1413 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1417 (define_expand "reduc_smin_v4sf"
1418 [(match_operand:V4SF 0 "register_operand" "")
1419 (match_operand:V4SF 1 "register_operand" "")]
1422 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1428 ;; Parallel floating point comparisons
1430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1432 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1433 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1435 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1436 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1437 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1440 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1441 [(set_attr "type" "ssecmp")
1442 (set_attr "length_immediate" "1")
1443 (set_attr "prefix" "vex")
1444 (set_attr "mode" "<MODE>")])
1446 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1447 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1448 (vec_merge:SSEMODEF2P
1450 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1451 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1452 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1457 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1458 [(set_attr "type" "ssecmp")
1459 (set_attr "length_immediate" "1")
1460 (set_attr "prefix" "vex")
1461 (set_attr "mode" "<ssescalarmode>")])
1463 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1464 ;; may generate 256bit vector compare instructions.
1465 (define_insn "*avx_maskcmp<mode>3"
1466 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1467 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1468 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1469 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1470 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1471 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1472 [(set_attr "type" "ssecmp")
1473 (set_attr "prefix" "vex")
1474 (set_attr "length_immediate" "1")
1475 (set_attr "mode" "<avxvecmode>")])
1477 (define_insn "<sse>_maskcmp<mode>3"
1478 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1479 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1480 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1481 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1483 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1484 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1485 [(set_attr "type" "ssecmp")
1486 (set_attr "length_immediate" "1")
1487 (set_attr "mode" "<MODE>")])
1489 (define_insn "*avx_vmmaskcmp<mode>3"
1490 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1491 (vec_merge:SSEMODEF2P
1492 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1493 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1494 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1497 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1498 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1499 [(set_attr "type" "ssecmp")
1500 (set_attr "prefix" "vex")
1501 (set_attr "mode" "<ssescalarmode>")])
1503 (define_insn "<sse>_vmmaskcmp<mode>3"
1504 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1505 (vec_merge:SSEMODEF2P
1506 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1507 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1508 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1512 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1513 [(set_attr "type" "ssecmp")
1514 (set_attr "length_immediate" "1")
1515 (set_attr "mode" "<ssescalarmode>")])
1517 (define_insn "<sse>_comi"
1518 [(set (reg:CCFP FLAGS_REG)
1521 (match_operand:<ssevecmode> 0 "register_operand" "x")
1522 (parallel [(const_int 0)]))
1524 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1525 (parallel [(const_int 0)]))))]
1526 "SSE_FLOAT_MODE_P (<MODE>mode)"
1527 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1528 [(set_attr "type" "ssecomi")
1529 (set_attr "prefix" "maybe_vex")
1530 (set_attr "prefix_rep" "0")
1531 (set (attr "prefix_data16")
1532 (if_then_else (eq_attr "mode" "DF")
1534 (const_string "0")))
1535 (set_attr "mode" "<MODE>")])
1537 (define_insn "<sse>_ucomi"
1538 [(set (reg:CCFPU FLAGS_REG)
1541 (match_operand:<ssevecmode> 0 "register_operand" "x")
1542 (parallel [(const_int 0)]))
1544 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1545 (parallel [(const_int 0)]))))]
1546 "SSE_FLOAT_MODE_P (<MODE>mode)"
1547 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1548 [(set_attr "type" "ssecomi")
1549 (set_attr "prefix" "maybe_vex")
1550 (set_attr "prefix_rep" "0")
1551 (set (attr "prefix_data16")
1552 (if_then_else (eq_attr "mode" "DF")
1554 (const_string "0")))
1555 (set_attr "mode" "<MODE>")])
1557 (define_expand "vcond<mode>"
1558 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1559 (if_then_else:AVXMODEF2P
1560 (match_operator 3 ""
1561 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1562 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1563 (match_operand:AVXMODEF2P 1 "general_operand" "")
1564 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1565 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1566 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1568 bool ok = ix86_expand_fp_vcond (operands);
1573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1575 ;; Parallel floating point logical operations
1577 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1579 (define_insn "avx_andnot<mode>3"
1580 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1583 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1584 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1585 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1586 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1587 [(set_attr "type" "sselog")
1588 (set_attr "prefix" "vex")
1589 (set_attr "mode" "<avxvecmode>")])
1591 (define_insn "<sse>_andnot<mode>3"
1592 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1595 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1596 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1597 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1598 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sselog")
1600 (set_attr "mode" "<MODE>")])
1602 (define_expand "<code><mode>3"
1603 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1604 (any_logic:AVX256MODEF2P
1605 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1606 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1607 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1608 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1610 (define_insn "*avx_<code><mode>3"
1611 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1612 (any_logic:AVXMODEF2P
1613 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1614 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1615 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1616 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1618 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1619 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1621 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1623 [(set_attr "type" "sselog")
1624 (set_attr "prefix" "vex")
1625 (set_attr "mode" "<avxvecmode>")])
1627 (define_expand "<code><mode>3"
1628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1629 (any_logic:SSEMODEF2P
1630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1633 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1635 (define_insn "*<code><mode>3"
1636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1637 (any_logic:SSEMODEF2P
1638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1641 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1643 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1644 return "<logic>ps\t{%2, %0|%0, %2}";
1646 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1648 [(set_attr "type" "sselog")
1649 (set_attr "mode" "<MODE>")])
1651 (define_expand "copysign<mode>3"
1654 (not:SSEMODEF2P (match_dup 3))
1655 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1657 (and:SSEMODEF2P (match_dup 3)
1658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1659 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1660 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1661 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1663 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1665 operands[4] = gen_reg_rtx (<MODE>mode);
1666 operands[5] = gen_reg_rtx (<MODE>mode);
1669 ;; Also define scalar versions. These are used for abs, neg, and
1670 ;; conditional move. Using subregs into vector modes causes register
1671 ;; allocation lossage. These patterns do not allow memory operands
1672 ;; because the native instructions read the full 128-bits.
1674 (define_insn "*avx_andnot<mode>3"
1675 [(set (match_operand:MODEF 0 "register_operand" "=x")
1678 (match_operand:MODEF 1 "register_operand" "x"))
1679 (match_operand:MODEF 2 "register_operand" "x")))]
1680 "AVX_FLOAT_MODE_P (<MODE>mode)"
1681 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1682 [(set_attr "type" "sselog")
1683 (set_attr "prefix" "vex")
1684 (set_attr "mode" "<ssevecmode>")])
1686 (define_insn "*andnot<mode>3"
1687 [(set (match_operand:MODEF 0 "register_operand" "=x")
1690 (match_operand:MODEF 1 "register_operand" "0"))
1691 (match_operand:MODEF 2 "register_operand" "x")))]
1692 "SSE_FLOAT_MODE_P (<MODE>mode)"
1693 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1694 [(set_attr "type" "sselog")
1695 (set_attr "mode" "<ssevecmode>")])
1697 (define_insn "*avx_<code><mode>3"
1698 [(set (match_operand:MODEF 0 "register_operand" "=x")
1700 (match_operand:MODEF 1 "register_operand" "x")
1701 (match_operand:MODEF 2 "register_operand" "x")))]
1702 "AVX_FLOAT_MODE_P (<MODE>mode)"
1704 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1705 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1707 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1709 [(set_attr "type" "sselog")
1710 (set_attr "prefix" "vex")
1711 (set_attr "mode" "<ssevecmode>")])
1713 (define_insn "*<code><mode>3"
1714 [(set (match_operand:MODEF 0 "register_operand" "=x")
1716 (match_operand:MODEF 1 "register_operand" "0")
1717 (match_operand:MODEF 2 "register_operand" "x")))]
1718 "SSE_FLOAT_MODE_P (<MODE>mode)"
1720 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1721 return "<logic>ps\t{%2, %0|%0, %2}";
1723 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "<ssevecmode>")])
1728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1730 ;; FMA4 floating point multiply/accumulate instructions. This
1731 ;; includes the scalar version of the instructions as well as the
1734 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1736 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1737 ;; combine to generate a multiply/add with two memory references. We then
1738 ;; split this insn, into loading up the destination register with one of the
1739 ;; memory operations. If we don't manage to split the insn, reload will
1740 ;; generate the appropriate moves. The reason this is needed, is that combine
1741 ;; has already folded one of the memory references into both the multiply and
1742 ;; add insns, and it can't generate a new pseudo. I.e.:
1743 ;; (set (reg1) (mem (addr1)))
1744 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1745 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1747 (define_insn "fma4_fmadd<mode>4256"
1748 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1751 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1752 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1753 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1754 "TARGET_FMA4 && TARGET_FUSED_MADD"
1755 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1756 [(set_attr "type" "ssemuladd")
1757 (set_attr "mode" "<MODE>")])
1759 ;; Floating multiply and subtract.
1760 (define_insn "fma4_fmsub<mode>4256"
1761 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1764 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1765 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1766 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1767 "TARGET_FMA4 && TARGET_FUSED_MADD"
1768 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1769 [(set_attr "type" "ssemuladd")
1770 (set_attr "mode" "<MODE>")])
1772 ;; Floating point negative multiply and add.
1773 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1774 (define_insn "fma4_fnmadd<mode>4256"
1775 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1777 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1779 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1781 "TARGET_FMA4 && TARGET_FUSED_MADD"
1782 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783 [(set_attr "type" "ssemuladd")
1784 (set_attr "mode" "<MODE>")])
1786 ;; Floating point negative multiply and subtract.
1787 (define_insn "fma4_fnmsub<mode>4256"
1788 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1792 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1793 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1794 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1795 "TARGET_FMA4 && TARGET_FUSED_MADD"
1796 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1797 [(set_attr "type" "ssemuladd")
1798 (set_attr "mode" "<MODE>")])
1800 (define_insn "fma4_fmadd<mode>4"
1801 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1804 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1805 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1806 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1807 "TARGET_FMA4 && TARGET_FUSED_MADD"
1808 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1809 [(set_attr "type" "ssemuladd")
1810 (set_attr "mode" "<MODE>")])
1812 ;; For the scalar operations, use operand1 for the upper words that aren't
1813 ;; modified, so restrict the forms that are generated.
1814 ;; Scalar version of fmadd.
1815 (define_insn "fma4_vmfmadd<mode>4"
1816 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1817 (vec_merge:SSEMODEF2P
1820 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1821 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1822 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1825 "TARGET_FMA4 && TARGET_FUSED_MADD"
1826 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1827 [(set_attr "type" "ssemuladd")
1828 (set_attr "mode" "<MODE>")])
1830 ;; Floating multiply and subtract.
1831 ;; Allow two memory operands the same as fmadd.
1832 (define_insn "fma4_fmsub<mode>4"
1833 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1836 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1837 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1838 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1839 "TARGET_FMA4 && TARGET_FUSED_MADD"
1840 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1841 [(set_attr "type" "ssemuladd")
1842 (set_attr "mode" "<MODE>")])
1844 ;; For the scalar operations, use operand1 for the upper words that aren't
1845 ;; modified, so restrict the forms that are generated.
1846 ;; Scalar version of fmsub.
1847 (define_insn "fma4_vmfmsub<mode>4"
1848 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1849 (vec_merge:SSEMODEF2P
1852 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1853 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1854 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1857 "TARGET_FMA4 && TARGET_FUSED_MADD"
1858 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1862 ;; Floating point negative multiply and add.
1863 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1864 (define_insn "fma4_fnmadd<mode>4"
1865 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1867 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1869 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1870 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1871 "TARGET_FMA4 && TARGET_FUSED_MADD"
1872 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1873 [(set_attr "type" "ssemuladd")
1874 (set_attr "mode" "<MODE>")])
1876 ;; For the scalar operations, use operand1 for the upper words that aren't
1877 ;; modified, so restrict the forms that are generated.
1878 ;; Scalar version of fnmadd.
1879 (define_insn "fma4_vmfnmadd<mode>4"
1880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1881 (vec_merge:SSEMODEF2P
1883 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1885 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1886 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1889 "TARGET_FMA4 && TARGET_FUSED_MADD"
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1894 ;; Floating point negative multiply and subtract.
1895 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1896 (define_insn "fma4_fnmsub<mode>4"
1897 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1901 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1902 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1903 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1904 "TARGET_FMA4 && TARGET_FUSED_MADD"
1905 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1906 [(set_attr "type" "ssemuladd")
1907 (set_attr "mode" "<MODE>")])
1909 ;; For the scalar operations, use operand1 for the upper words that aren't
1910 ;; modified, so restrict the forms that are generated.
1911 ;; Scalar version of fnmsub.
1912 (define_insn "fma4_vmfnmsub<mode>4"
1913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1914 (vec_merge:SSEMODEF2P
1918 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1919 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1920 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1923 "TARGET_FMA4 && TARGET_FUSED_MADD"
1924 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1925 [(set_attr "type" "ssemuladd")
1926 (set_attr "mode" "<MODE>")])
1928 (define_insn "fma4i_fmadd<mode>4256"
1929 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1933 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1934 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1935 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1936 UNSPEC_FMA4_INTRINSIC))]
1938 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1939 [(set_attr "type" "ssemuladd")
1940 (set_attr "mode" "<MODE>")])
1942 (define_insn "fma4i_fmsub<mode>4256"
1943 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1947 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1948 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1949 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1950 UNSPEC_FMA4_INTRINSIC))]
1952 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1953 [(set_attr "type" "ssemuladd")
1954 (set_attr "mode" "<MODE>")])
1956 (define_insn "fma4i_fnmadd<mode>4256"
1957 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1960 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1962 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1963 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1964 UNSPEC_FMA4_INTRINSIC))]
1966 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1967 [(set_attr "type" "ssemuladd")
1968 (set_attr "mode" "<MODE>")])
1970 (define_insn "fma4i_fnmsub<mode>4256"
1971 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1976 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1977 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1978 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1979 UNSPEC_FMA4_INTRINSIC))]
1981 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1982 [(set_attr "type" "ssemuladd")
1983 (set_attr "mode" "<MODE>")])
1985 (define_insn "fma4i_fmadd<mode>4"
1986 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1990 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1991 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1992 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1993 UNSPEC_FMA4_INTRINSIC))]
1995 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1996 [(set_attr "type" "ssemuladd")
1997 (set_attr "mode" "<MODE>")])
1999 (define_insn "fma4i_fmsub<mode>4"
2000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2007 UNSPEC_FMA4_INTRINSIC))]
2009 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 (define_insn "fma4i_fnmadd<mode>4"
2014 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2017 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2019 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2021 UNSPEC_FMA4_INTRINSIC))]
2023 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2027 (define_insn "fma4i_fnmsub<mode>4"
2028 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2033 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2034 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2035 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2036 UNSPEC_FMA4_INTRINSIC))]
2038 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039 [(set_attr "type" "ssemuladd")
2040 (set_attr "mode" "<MODE>")])
2042 ;; For the scalar operations, use operand1 for the upper words that aren't
2043 ;; modified, so restrict the forms that are accepted.
2044 (define_insn "fma4i_vmfmadd<mode>4"
2045 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2047 [(vec_merge:SSEMODEF2P
2050 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2051 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2052 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2055 UNSPEC_FMA4_INTRINSIC))]
2057 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2058 [(set_attr "type" "ssemuladd")
2059 (set_attr "mode" "<ssescalarmode>")])
2061 (define_insn "fma4i_vmfmsub<mode>4"
2062 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2064 [(vec_merge:SSEMODEF2P
2067 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2068 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2069 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2072 UNSPEC_FMA4_INTRINSIC))]
2074 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2075 [(set_attr "type" "ssemuladd")
2076 (set_attr "mode" "<ssescalarmode>")])
2078 (define_insn "fma4i_vmfnmadd<mode>4"
2079 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2081 [(vec_merge:SSEMODEF2P
2083 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2085 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2089 UNSPEC_FMA4_INTRINSIC))]
2091 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2092 [(set_attr "type" "ssemuladd")
2093 (set_attr "mode" "<ssescalarmode>")])
2095 (define_insn "fma4i_vmfnmsub<mode>4"
2096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2098 [(vec_merge:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2103 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2104 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2107 UNSPEC_FMA4_INTRINSIC))]
2109 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2110 [(set_attr "type" "ssemuladd")
2111 (set_attr "mode" "<ssescalarmode>")])
2113 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2115 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2117 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2119 (define_insn "fma4_fmaddsubv8sf4"
2120 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2124 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V8SF")])
2138 (define_insn "fma4_fmaddsubv4df4"
2139 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2143 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V4DF")])
2157 (define_insn "fma4_fmaddsubv4sf4"
2158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2162 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V4SF")])
2176 (define_insn "fma4_fmaddsubv2df4"
2177 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2181 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V2DF")])
2195 (define_insn "fma4_fmsubaddv8sf4"
2196 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2200 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V8SF")])
2214 (define_insn "fma4_fmsubaddv4df4"
2215 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2219 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V4DF")])
2233 (define_insn "fma4_fmsubaddv4sf4"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2238 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2239 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2240 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2247 "TARGET_FMA4 && TARGET_FUSED_MADD"
2248 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2249 [(set_attr "type" "ssemuladd")
2250 (set_attr "mode" "V4SF")])
2252 (define_insn "fma4_fmsubaddv2df4"
2253 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2257 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2258 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2259 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2266 "TARGET_FMA4 && TARGET_FUSED_MADD"
2267 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2268 [(set_attr "type" "ssemuladd")
2269 (set_attr "mode" "V2DF")])
2271 (define_insn "fma4i_fmaddsubv8sf4"
2272 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2277 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2278 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2279 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2286 UNSPEC_FMA4_INTRINSIC))]
2288 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2289 [(set_attr "type" "ssemuladd")
2290 (set_attr "mode" "V8SF")])
2292 (define_insn "fma4i_fmaddsubv4df4"
2293 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2298 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2299 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2300 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2307 UNSPEC_FMA4_INTRINSIC))]
2309 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2310 [(set_attr "type" "ssemuladd")
2311 (set_attr "mode" "V4DF")])
2313 (define_insn "fma4i_fmaddsubv4sf4"
2314 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2319 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2320 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2321 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2328 UNSPEC_FMA4_INTRINSIC))]
2330 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2331 [(set_attr "type" "ssemuladd")
2332 (set_attr "mode" "V4SF")])
2334 (define_insn "fma4i_fmaddsubv2df4"
2335 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2340 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2341 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2342 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2349 UNSPEC_FMA4_INTRINSIC))]
2351 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2352 [(set_attr "type" "ssemuladd")
2353 (set_attr "mode" "V2DF")])
2355 (define_insn "fma4i_fmsubaddv8sf4"
2356 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2361 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2362 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2363 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2370 UNSPEC_FMA4_INTRINSIC))]
2372 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2373 [(set_attr "type" "ssemuladd")
2374 (set_attr "mode" "V8SF")])
2376 (define_insn "fma4i_fmsubaddv4df4"
2377 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2382 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2383 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2384 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2391 UNSPEC_FMA4_INTRINSIC))]
2393 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2394 [(set_attr "type" "ssemuladd")
2395 (set_attr "mode" "V4DF")])
2397 (define_insn "fma4i_fmsubaddv4sf4"
2398 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2403 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2404 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2405 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2412 UNSPEC_FMA4_INTRINSIC))]
2414 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2415 [(set_attr "type" "ssemuladd")
2416 (set_attr "mode" "V4SF")])
2418 (define_insn "fma4i_fmsubaddv2df4"
2419 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2424 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2425 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2426 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2433 UNSPEC_FMA4_INTRINSIC))]
2435 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2436 [(set_attr "type" "ssemuladd")
2437 (set_attr "mode" "V2DF")])
2439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2441 ;; Parallel single-precision floating point conversion operations
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 (define_insn "sse_cvtpi2ps"
2446 [(set (match_operand:V4SF 0 "register_operand" "=x")
2449 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2450 (match_operand:V4SF 1 "register_operand" "0")
2453 "cvtpi2ps\t{%2, %0|%0, %2}"
2454 [(set_attr "type" "ssecvt")
2455 (set_attr "mode" "V4SF")])
2457 (define_insn "sse_cvtps2pi"
2458 [(set (match_operand:V2SI 0 "register_operand" "=y")
2460 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2462 (parallel [(const_int 0) (const_int 1)])))]
2464 "cvtps2pi\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "ssecvt")
2466 (set_attr "unit" "mmx")
2467 (set_attr "mode" "DI")])
2469 (define_insn "sse_cvttps2pi"
2470 [(set (match_operand:V2SI 0 "register_operand" "=y")
2472 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2473 (parallel [(const_int 0) (const_int 1)])))]
2475 "cvttps2pi\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "ssecvt")
2477 (set_attr "unit" "mmx")
2478 (set_attr "prefix_rep" "0")
2479 (set_attr "mode" "SF")])
2481 (define_insn "*avx_cvtsi2ss"
2482 [(set (match_operand:V4SF 0 "register_operand" "=x")
2485 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V4SF 1 "register_operand" "x")
2489 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "SF")])
2494 (define_insn "sse_cvtsi2ss"
2495 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2498 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V4SF 1 "register_operand" "0,0")
2502 "cvtsi2ss\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "vector,double")
2505 (set_attr "amdfam10_decode" "vector,double")
2506 (set_attr "mode" "SF")])
2508 (define_insn "*avx_cvtsi2ssq"
2509 [(set (match_operand:V4SF 0 "register_operand" "=x")
2512 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2513 (match_operand:V4SF 1 "register_operand" "x")
2515 "TARGET_AVX && TARGET_64BIT"
2516 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "length_vex" "4")
2519 (set_attr "prefix" "vex")
2520 (set_attr "mode" "SF")])
2522 (define_insn "sse_cvtsi2ssq"
2523 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2526 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2527 (match_operand:V4SF 1 "register_operand" "0,0")
2529 "TARGET_SSE && TARGET_64BIT"
2530 "cvtsi2ssq\t{%2, %0|%0, %2}"
2531 [(set_attr "type" "sseicvt")
2532 (set_attr "prefix_rex" "1")
2533 (set_attr "athlon_decode" "vector,double")
2534 (set_attr "amdfam10_decode" "vector,double")
2535 (set_attr "mode" "SF")])
2537 (define_insn "sse_cvtss2si"
2538 [(set (match_operand:SI 0 "register_operand" "=r,r")
2541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2542 (parallel [(const_int 0)]))]
2543 UNSPEC_FIX_NOTRUNC))]
2545 "%vcvtss2si\t{%1, %0|%0, %1}"
2546 [(set_attr "type" "sseicvt")
2547 (set_attr "athlon_decode" "double,vector")
2548 (set_attr "prefix_rep" "1")
2549 (set_attr "prefix" "maybe_vex")
2550 (set_attr "mode" "SI")])
2552 (define_insn "sse_cvtss2si_2"
2553 [(set (match_operand:SI 0 "register_operand" "=r,r")
2554 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2555 UNSPEC_FIX_NOTRUNC))]
2557 "%vcvtss2si\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "sseicvt")
2559 (set_attr "athlon_decode" "double,vector")
2560 (set_attr "amdfam10_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "SI")])
2565 (define_insn "sse_cvtss2siq"
2566 [(set (match_operand:DI 0 "register_operand" "=r,r")
2569 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2570 (parallel [(const_int 0)]))]
2571 UNSPEC_FIX_NOTRUNC))]
2572 "TARGET_SSE && TARGET_64BIT"
2573 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2574 [(set_attr "type" "sseicvt")
2575 (set_attr "athlon_decode" "double,vector")
2576 (set_attr "prefix_rep" "1")
2577 (set_attr "prefix" "maybe_vex")
2578 (set_attr "mode" "DI")])
2580 (define_insn "sse_cvtss2siq_2"
2581 [(set (match_operand:DI 0 "register_operand" "=r,r")
2582 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2583 UNSPEC_FIX_NOTRUNC))]
2584 "TARGET_SSE && TARGET_64BIT"
2585 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2586 [(set_attr "type" "sseicvt")
2587 (set_attr "athlon_decode" "double,vector")
2588 (set_attr "amdfam10_decode" "double,double")
2589 (set_attr "prefix_rep" "1")
2590 (set_attr "prefix" "maybe_vex")
2591 (set_attr "mode" "DI")])
2593 (define_insn "sse_cvttss2si"
2594 [(set (match_operand:SI 0 "register_operand" "=r,r")
2597 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2598 (parallel [(const_int 0)]))))]
2600 "%vcvttss2si\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "athlon_decode" "double,vector")
2603 (set_attr "amdfam10_decode" "double,double")
2604 (set_attr "prefix_rep" "1")
2605 (set_attr "prefix" "maybe_vex")
2606 (set_attr "mode" "SI")])
2608 (define_insn "sse_cvttss2siq"
2609 [(set (match_operand:DI 0 "register_operand" "=r,r")
2612 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2613 (parallel [(const_int 0)]))))]
2614 "TARGET_SSE && TARGET_64BIT"
2615 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2616 [(set_attr "type" "sseicvt")
2617 (set_attr "athlon_decode" "double,vector")
2618 (set_attr "amdfam10_decode" "double,double")
2619 (set_attr "prefix_rep" "1")
2620 (set_attr "prefix" "maybe_vex")
2621 (set_attr "mode" "DI")])
2623 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2624 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2625 (float:AVXMODEDCVTDQ2PS
2626 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2628 "vcvtdq2ps\t{%1, %0|%0, %1}"
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "prefix" "vex")
2631 (set_attr "mode" "<avxvecmode>")])
2633 (define_insn "sse2_cvtdq2ps"
2634 [(set (match_operand:V4SF 0 "register_operand" "=x")
2635 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2637 "cvtdq2ps\t{%1, %0|%0, %1}"
2638 [(set_attr "type" "ssecvt")
2639 (set_attr "mode" "V4SF")])
2641 (define_expand "sse2_cvtudq2ps"
2643 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2645 (lt:V4SF (match_dup 5) (match_dup 3)))
2647 (and:V4SF (match_dup 6) (match_dup 4)))
2648 (set (match_operand:V4SF 0 "register_operand" "")
2649 (plus:V4SF (match_dup 5) (match_dup 7)))]
2652 REAL_VALUE_TYPE TWO32r;
2656 real_ldexp (&TWO32r, &dconst1, 32);
2657 x = const_double_from_real_value (TWO32r, SFmode);
2659 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2660 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2662 for (i = 5; i < 8; i++)
2663 operands[i] = gen_reg_rtx (V4SFmode);
2666 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2667 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2668 (unspec:AVXMODEDCVTPS2DQ
2669 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2670 UNSPEC_FIX_NOTRUNC))]
2672 "vcvtps2dq\t{%1, %0|%0, %1}"
2673 [(set_attr "type" "ssecvt")
2674 (set_attr "prefix" "vex")
2675 (set_attr "mode" "<avxvecmode>")])
2677 (define_insn "sse2_cvtps2dq"
2678 [(set (match_operand:V4SI 0 "register_operand" "=x")
2679 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2680 UNSPEC_FIX_NOTRUNC))]
2682 "cvtps2dq\t{%1, %0|%0, %1}"
2683 [(set_attr "type" "ssecvt")
2684 (set_attr "prefix_data16" "1")
2685 (set_attr "mode" "TI")])
2687 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2688 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2689 (fix:AVXMODEDCVTPS2DQ
2690 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2692 "vcvttps2dq\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "ssecvt")
2694 (set_attr "prefix" "vex")
2695 (set_attr "mode" "<avxvecmode>")])
2697 (define_insn "sse2_cvttps2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2699 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2701 "cvttps2dq\t{%1, %0|%0, %1}"
2702 [(set_attr "type" "ssecvt")
2703 (set_attr "prefix_rep" "1")
2704 (set_attr "prefix_data16" "0")
2705 (set_attr "mode" "TI")])
2707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2709 ;; Parallel double-precision floating point conversion operations
2711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 (define_insn "sse2_cvtpi2pd"
2714 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2715 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2717 "cvtpi2pd\t{%1, %0|%0, %1}"
2718 [(set_attr "type" "ssecvt")
2719 (set_attr "unit" "mmx,*")
2720 (set_attr "prefix_data16" "1,*")
2721 (set_attr "mode" "V2DF")])
2723 (define_insn "sse2_cvtpd2pi"
2724 [(set (match_operand:V2SI 0 "register_operand" "=y")
2725 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2726 UNSPEC_FIX_NOTRUNC))]
2728 "cvtpd2pi\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "unit" "mmx")
2731 (set_attr "prefix_data16" "1")
2732 (set_attr "mode" "DI")])
2734 (define_insn "sse2_cvttpd2pi"
2735 [(set (match_operand:V2SI 0 "register_operand" "=y")
2736 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2738 "cvttpd2pi\t{%1, %0|%0, %1}"
2739 [(set_attr "type" "ssecvt")
2740 (set_attr "unit" "mmx")
2741 (set_attr "prefix_data16" "1")
2742 (set_attr "mode" "TI")])
2744 (define_insn "*avx_cvtsi2sd"
2745 [(set (match_operand:V2DF 0 "register_operand" "=x")
2748 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2749 (match_operand:V2DF 1 "register_operand" "x")
2752 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2753 [(set_attr "type" "sseicvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "DF")])
2757 (define_insn "sse2_cvtsi2sd"
2758 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2761 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2762 (match_operand:V2DF 1 "register_operand" "0,0")
2765 "cvtsi2sd\t{%2, %0|%0, %2}"
2766 [(set_attr "type" "sseicvt")
2767 (set_attr "mode" "DF")
2768 (set_attr "athlon_decode" "double,direct")
2769 (set_attr "amdfam10_decode" "vector,double")])
2771 (define_insn "*avx_cvtsi2sdq"
2772 [(set (match_operand:V2DF 0 "register_operand" "=x")
2775 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2776 (match_operand:V2DF 1 "register_operand" "x")
2778 "TARGET_AVX && TARGET_64BIT"
2779 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2780 [(set_attr "type" "sseicvt")
2781 (set_attr "length_vex" "4")
2782 (set_attr "prefix" "vex")
2783 (set_attr "mode" "DF")])
2785 (define_insn "sse2_cvtsi2sdq"
2786 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2789 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2790 (match_operand:V2DF 1 "register_operand" "0,0")
2792 "TARGET_SSE2 && TARGET_64BIT"
2793 "cvtsi2sdq\t{%2, %0|%0, %2}"
2794 [(set_attr "type" "sseicvt")
2795 (set_attr "prefix_rex" "1")
2796 (set_attr "mode" "DF")
2797 (set_attr "athlon_decode" "double,direct")
2798 (set_attr "amdfam10_decode" "vector,double")])
2800 (define_insn "sse2_cvtsd2si"
2801 [(set (match_operand:SI 0 "register_operand" "=r,r")
2804 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2805 (parallel [(const_int 0)]))]
2806 UNSPEC_FIX_NOTRUNC))]
2808 "%vcvtsd2si\t{%1, %0|%0, %1}"
2809 [(set_attr "type" "sseicvt")
2810 (set_attr "athlon_decode" "double,vector")
2811 (set_attr "prefix_rep" "1")
2812 (set_attr "prefix" "maybe_vex")
2813 (set_attr "mode" "SI")])
2815 (define_insn "sse2_cvtsd2si_2"
2816 [(set (match_operand:SI 0 "register_operand" "=r,r")
2817 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2818 UNSPEC_FIX_NOTRUNC))]
2820 "%vcvtsd2si\t{%1, %0|%0, %1}"
2821 [(set_attr "type" "sseicvt")
2822 (set_attr "athlon_decode" "double,vector")
2823 (set_attr "amdfam10_decode" "double,double")
2824 (set_attr "prefix_rep" "1")
2825 (set_attr "prefix" "maybe_vex")
2826 (set_attr "mode" "SI")])
2828 (define_insn "sse2_cvtsd2siq"
2829 [(set (match_operand:DI 0 "register_operand" "=r,r")
2832 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2833 (parallel [(const_int 0)]))]
2834 UNSPEC_FIX_NOTRUNC))]
2835 "TARGET_SSE2 && TARGET_64BIT"
2836 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2837 [(set_attr "type" "sseicvt")
2838 (set_attr "athlon_decode" "double,vector")
2839 (set_attr "prefix_rep" "1")
2840 (set_attr "prefix" "maybe_vex")
2841 (set_attr "mode" "DI")])
2843 (define_insn "sse2_cvtsd2siq_2"
2844 [(set (match_operand:DI 0 "register_operand" "=r,r")
2845 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2846 UNSPEC_FIX_NOTRUNC))]
2847 "TARGET_SSE2 && TARGET_64BIT"
2848 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2849 [(set_attr "type" "sseicvt")
2850 (set_attr "athlon_decode" "double,vector")
2851 (set_attr "amdfam10_decode" "double,double")
2852 (set_attr "prefix_rep" "1")
2853 (set_attr "prefix" "maybe_vex")
2854 (set_attr "mode" "DI")])
2856 (define_insn "sse2_cvttsd2si"
2857 [(set (match_operand:SI 0 "register_operand" "=r,r")
2860 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2861 (parallel [(const_int 0)]))))]
2863 "%vcvttsd2si\t{%1, %0|%0, %1}"
2864 [(set_attr "type" "sseicvt")
2865 (set_attr "prefix_rep" "1")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "SI")
2868 (set_attr "athlon_decode" "double,vector")
2869 (set_attr "amdfam10_decode" "double,double")])
2871 (define_insn "sse2_cvttsd2siq"
2872 [(set (match_operand:DI 0 "register_operand" "=r,r")
2875 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2876 (parallel [(const_int 0)]))))]
2877 "TARGET_SSE2 && TARGET_64BIT"
2878 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2879 [(set_attr "type" "sseicvt")
2880 (set_attr "prefix_rep" "1")
2881 (set_attr "prefix" "maybe_vex")
2882 (set_attr "mode" "DI")
2883 (set_attr "athlon_decode" "double,vector")
2884 (set_attr "amdfam10_decode" "double,double")])
2886 (define_insn "avx_cvtdq2pd256"
2887 [(set (match_operand:V4DF 0 "register_operand" "=x")
2888 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2890 "vcvtdq2pd\t{%1, %0|%0, %1}"
2891 [(set_attr "type" "ssecvt")
2892 (set_attr "prefix" "vex")
2893 (set_attr "mode" "V4DF")])
2895 (define_insn "sse2_cvtdq2pd"
2896 [(set (match_operand:V2DF 0 "register_operand" "=x")
2899 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2900 (parallel [(const_int 0) (const_int 1)]))))]
2902 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix" "maybe_vex")
2905 (set_attr "mode" "V2DF")])
2907 (define_insn "avx_cvtpd2dq256"
2908 [(set (match_operand:V4SI 0 "register_operand" "=x")
2909 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2910 UNSPEC_FIX_NOTRUNC))]
2912 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2913 [(set_attr "type" "ssecvt")
2914 (set_attr "prefix" "vex")
2915 (set_attr "mode" "OI")])
2917 (define_expand "sse2_cvtpd2dq"
2918 [(set (match_operand:V4SI 0 "register_operand" "")
2920 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2924 "operands[2] = CONST0_RTX (V2SImode);")
2926 (define_insn "*sse2_cvtpd2dq"
2927 [(set (match_operand:V4SI 0 "register_operand" "=x")
2929 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2931 (match_operand:V2SI 2 "const0_operand" "")))]
2933 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2934 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2935 [(set_attr "type" "ssecvt")
2936 (set_attr "prefix_rep" "1")
2937 (set_attr "prefix_data16" "0")
2938 (set_attr "prefix" "maybe_vex")
2939 (set_attr "mode" "TI")
2940 (set_attr "amdfam10_decode" "double")])
2942 (define_insn "avx_cvttpd2dq256"
2943 [(set (match_operand:V4SI 0 "register_operand" "=x")
2944 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2946 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2947 [(set_attr "type" "ssecvt")
2948 (set_attr "prefix" "vex")
2949 (set_attr "mode" "OI")])
2951 (define_expand "sse2_cvttpd2dq"
2952 [(set (match_operand:V4SI 0 "register_operand" "")
2954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2957 "operands[2] = CONST0_RTX (V2SImode);")
2959 (define_insn "*sse2_cvttpd2dq"
2960 [(set (match_operand:V4SI 0 "register_operand" "=x")
2962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2963 (match_operand:V2SI 2 "const0_operand" "")))]
2965 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2966 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2967 [(set_attr "type" "ssecvt")
2968 (set_attr "prefix" "maybe_vex")
2969 (set_attr "mode" "TI")
2970 (set_attr "amdfam10_decode" "double")])
2972 (define_insn "*avx_cvtsd2ss"
2973 [(set (match_operand:V4SF 0 "register_operand" "=x")
2976 (float_truncate:V2SF
2977 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2978 (match_operand:V4SF 1 "register_operand" "x")
2981 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2982 [(set_attr "type" "ssecvt")
2983 (set_attr "prefix" "vex")
2984 (set_attr "mode" "SF")])
2986 (define_insn "sse2_cvtsd2ss"
2987 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2990 (float_truncate:V2SF
2991 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2992 (match_operand:V4SF 1 "register_operand" "0,0")
2995 "cvtsd2ss\t{%2, %0|%0, %2}"
2996 [(set_attr "type" "ssecvt")
2997 (set_attr "athlon_decode" "vector,double")
2998 (set_attr "amdfam10_decode" "vector,double")
2999 (set_attr "mode" "SF")])
3001 (define_insn "*avx_cvtss2sd"
3002 [(set (match_operand:V2DF 0 "register_operand" "=x")
3006 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3007 (parallel [(const_int 0) (const_int 1)])))
3008 (match_operand:V2DF 1 "register_operand" "x")
3011 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3012 [(set_attr "type" "ssecvt")
3013 (set_attr "prefix" "vex")
3014 (set_attr "mode" "DF")])
3016 (define_insn "sse2_cvtss2sd"
3017 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3021 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3022 (parallel [(const_int 0) (const_int 1)])))
3023 (match_operand:V2DF 1 "register_operand" "0,0")
3026 "cvtss2sd\t{%2, %0|%0, %2}"
3027 [(set_attr "type" "ssecvt")
3028 (set_attr "amdfam10_decode" "vector,double")
3029 (set_attr "mode" "DF")])
3031 (define_insn "avx_cvtpd2ps256"
3032 [(set (match_operand:V4SF 0 "register_operand" "=x")
3033 (float_truncate:V4SF
3034 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3036 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3037 [(set_attr "type" "ssecvt")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF")])
3041 (define_expand "sse2_cvtpd2ps"
3042 [(set (match_operand:V4SF 0 "register_operand" "")
3044 (float_truncate:V2SF
3045 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3048 "operands[2] = CONST0_RTX (V2SFmode);")
3050 (define_insn "*sse2_cvtpd2ps"
3051 [(set (match_operand:V4SF 0 "register_operand" "=x")
3053 (float_truncate:V2SF
3054 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3055 (match_operand:V2SF 2 "const0_operand" "")))]
3057 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3058 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3059 [(set_attr "type" "ssecvt")
3060 (set_attr "prefix_data16" "1")
3061 (set_attr "prefix" "maybe_vex")
3062 (set_attr "mode" "V4SF")
3063 (set_attr "amdfam10_decode" "double")])
3065 (define_insn "avx_cvtps2pd256"
3066 [(set (match_operand:V4DF 0 "register_operand" "=x")
3068 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3070 "vcvtps2pd\t{%1, %0|%0, %1}"
3071 [(set_attr "type" "ssecvt")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V4DF")])
3075 (define_insn "sse2_cvtps2pd"
3076 [(set (match_operand:V2DF 0 "register_operand" "=x")
3079 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3080 (parallel [(const_int 0) (const_int 1)]))))]
3082 "%vcvtps2pd\t{%1, %0|%0, %1}"
3083 [(set_attr "type" "ssecvt")
3084 (set_attr "prefix" "maybe_vex")
3085 (set_attr "mode" "V2DF")
3086 (set_attr "prefix_data16" "0")
3087 (set_attr "amdfam10_decode" "direct")])
3089 (define_expand "vec_unpacks_hi_v4sf"
3094 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3095 (parallel [(const_int 6)
3099 (set (match_operand:V2DF 0 "register_operand" "")
3103 (parallel [(const_int 0) (const_int 1)]))))]
3105 "operands[2] = gen_reg_rtx (V4SFmode);")
3107 (define_expand "vec_unpacks_lo_v4sf"
3108 [(set (match_operand:V2DF 0 "register_operand" "")
3111 (match_operand:V4SF 1 "nonimmediate_operand" "")
3112 (parallel [(const_int 0) (const_int 1)]))))]
3115 (define_expand "vec_unpacks_float_hi_v8hi"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V8HI 1 "register_operand" "")]
3120 rtx tmp = gen_reg_rtx (V4SImode);
3122 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3123 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3127 (define_expand "vec_unpacks_float_lo_v8hi"
3128 [(match_operand:V4SF 0 "register_operand" "")
3129 (match_operand:V8HI 1 "register_operand" "")]
3132 rtx tmp = gen_reg_rtx (V4SImode);
3134 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3135 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3139 (define_expand "vec_unpacku_float_hi_v8hi"
3140 [(match_operand:V4SF 0 "register_operand" "")
3141 (match_operand:V8HI 1 "register_operand" "")]
3144 rtx tmp = gen_reg_rtx (V4SImode);
3146 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3147 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3151 (define_expand "vec_unpacku_float_lo_v8hi"
3152 [(match_operand:V4SF 0 "register_operand" "")
3153 (match_operand:V8HI 1 "register_operand" "")]
3156 rtx tmp = gen_reg_rtx (V4SImode);
3158 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3159 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3163 (define_expand "vec_unpacks_float_hi_v4si"
3166 (match_operand:V4SI 1 "nonimmediate_operand" "")
3167 (parallel [(const_int 2)
3171 (set (match_operand:V2DF 0 "register_operand" "")
3175 (parallel [(const_int 0) (const_int 1)]))))]
3177 "operands[2] = gen_reg_rtx (V4SImode);")
3179 (define_expand "vec_unpacks_float_lo_v4si"
3180 [(set (match_operand:V2DF 0 "register_operand" "")
3183 (match_operand:V4SI 1 "nonimmediate_operand" "")
3184 (parallel [(const_int 0) (const_int 1)]))))]
3187 (define_expand "vec_unpacku_float_hi_v4si"
3190 (match_operand:V4SI 1 "nonimmediate_operand" "")
3191 (parallel [(const_int 2)
3199 (parallel [(const_int 0) (const_int 1)]))))
3201 (lt:V2DF (match_dup 6) (match_dup 3)))
3203 (and:V2DF (match_dup 7) (match_dup 4)))
3204 (set (match_operand:V2DF 0 "register_operand" "")
3205 (plus:V2DF (match_dup 6) (match_dup 8)))]
3208 REAL_VALUE_TYPE TWO32r;
3212 real_ldexp (&TWO32r, &dconst1, 32);
3213 x = const_double_from_real_value (TWO32r, DFmode);
3215 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3216 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3218 operands[5] = gen_reg_rtx (V4SImode);
3220 for (i = 6; i < 9; i++)
3221 operands[i] = gen_reg_rtx (V2DFmode);
3224 (define_expand "vec_unpacku_float_lo_v4si"
3228 (match_operand:V4SI 1 "nonimmediate_operand" "")
3229 (parallel [(const_int 0) (const_int 1)]))))
3231 (lt:V2DF (match_dup 5) (match_dup 3)))
3233 (and:V2DF (match_dup 6) (match_dup 4)))
3234 (set (match_operand:V2DF 0 "register_operand" "")
3235 (plus:V2DF (match_dup 5) (match_dup 7)))]
3238 REAL_VALUE_TYPE TWO32r;
3242 real_ldexp (&TWO32r, &dconst1, 32);
3243 x = const_double_from_real_value (TWO32r, DFmode);
3245 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3246 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3248 for (i = 5; i < 8; i++)
3249 operands[i] = gen_reg_rtx (V2DFmode);
3252 (define_expand "vec_pack_trunc_v2df"
3253 [(match_operand:V4SF 0 "register_operand" "")
3254 (match_operand:V2DF 1 "nonimmediate_operand" "")
3255 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3260 r1 = gen_reg_rtx (V4SFmode);
3261 r2 = gen_reg_rtx (V4SFmode);
3263 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3264 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3265 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3269 (define_expand "vec_pack_sfix_trunc_v2df"
3270 [(match_operand:V4SI 0 "register_operand" "")
3271 (match_operand:V2DF 1 "nonimmediate_operand" "")
3272 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3277 r1 = gen_reg_rtx (V4SImode);
3278 r2 = gen_reg_rtx (V4SImode);
3280 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3281 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3282 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3283 gen_lowpart (V2DImode, r1),
3284 gen_lowpart (V2DImode, r2)));
3288 (define_expand "vec_pack_sfix_v2df"
3289 [(match_operand:V4SI 0 "register_operand" "")
3290 (match_operand:V2DF 1 "nonimmediate_operand" "")
3291 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3296 r1 = gen_reg_rtx (V4SImode);
3297 r2 = gen_reg_rtx (V4SImode);
3299 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3300 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3301 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3302 gen_lowpart (V2DImode, r1),
3303 gen_lowpart (V2DImode, r2)));
3307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3309 ;; Parallel single-precision floating point element swizzling
3311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3313 (define_expand "sse_movhlps_exp"
3314 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3317 (match_operand:V4SF 1 "nonimmediate_operand" "")
3318 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3319 (parallel [(const_int 6)
3324 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3326 (define_insn "*avx_movhlps"
3327 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3330 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3331 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3332 (parallel [(const_int 6)
3336 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3338 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3339 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3340 vmovhps\t{%2, %0|%0, %2}"
3341 [(set_attr "type" "ssemov")
3342 (set_attr "prefix" "vex")
3343 (set_attr "mode" "V4SF,V2SF,V2SF")])
3345 (define_insn "sse_movhlps"
3346 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3349 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3350 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3351 (parallel [(const_int 6)
3355 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3357 movhlps\t{%2, %0|%0, %2}
3358 movlps\t{%H2, %0|%0, %H2}
3359 movhps\t{%2, %0|%0, %2}"
3360 [(set_attr "type" "ssemov")
3361 (set_attr "mode" "V4SF,V2SF,V2SF")])
3363 (define_expand "sse_movlhps_exp"
3364 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3367 (match_operand:V4SF 1 "nonimmediate_operand" "")
3368 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3369 (parallel [(const_int 0)
3374 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3376 (define_insn "*avx_movlhps"
3377 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3380 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3381 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3382 (parallel [(const_int 0)
3386 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3388 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3389 vmovhps\t{%2, %1, %0|%0, %1, %2}
3390 vmovlps\t{%2, %H0|%H0, %2}"
3391 [(set_attr "type" "ssemov")
3392 (set_attr "prefix" "vex")
3393 (set_attr "mode" "V4SF,V2SF,V2SF")])
3395 (define_insn "sse_movlhps"
3396 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3399 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3400 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3401 (parallel [(const_int 0)
3405 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3407 movlhps\t{%2, %0|%0, %2}
3408 movhps\t{%2, %0|%0, %2}
3409 movlps\t{%2, %H0|%H0, %2}"
3410 [(set_attr "type" "ssemov")
3411 (set_attr "mode" "V4SF,V2SF,V2SF")])
3413 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3414 (define_insn "avx_unpckhps256"
3415 [(set (match_operand:V8SF 0 "register_operand" "=x")
3418 (match_operand:V8SF 1 "register_operand" "x")
3419 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3420 (parallel [(const_int 2) (const_int 10)
3421 (const_int 3) (const_int 11)
3422 (const_int 6) (const_int 14)
3423 (const_int 7) (const_int 15)])))]
3425 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3426 [(set_attr "type" "sselog")
3427 (set_attr "prefix" "vex")
3428 (set_attr "mode" "V8SF")])
3430 (define_insn "*avx_interleave_highv4sf"
3431 [(set (match_operand:V4SF 0 "register_operand" "=x")
3434 (match_operand:V4SF 1 "register_operand" "x")
3435 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3436 (parallel [(const_int 2) (const_int 6)
3437 (const_int 3) (const_int 7)])))]
3439 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3440 [(set_attr "type" "sselog")
3441 (set_attr "prefix" "vex")
3442 (set_attr "mode" "V4SF")])
3444 (define_insn "vec_interleave_highv4sf"
3445 [(set (match_operand:V4SF 0 "register_operand" "=x")
3448 (match_operand:V4SF 1 "register_operand" "0")
3449 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3450 (parallel [(const_int 2) (const_int 6)
3451 (const_int 3) (const_int 7)])))]
3453 "unpckhps\t{%2, %0|%0, %2}"
3454 [(set_attr "type" "sselog")
3455 (set_attr "mode" "V4SF")])
3457 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3458 (define_insn "avx_unpcklps256"
3459 [(set (match_operand:V8SF 0 "register_operand" "=x")
3462 (match_operand:V8SF 1 "register_operand" "x")
3463 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3464 (parallel [(const_int 0) (const_int 8)
3465 (const_int 1) (const_int 9)
3466 (const_int 4) (const_int 12)
3467 (const_int 5) (const_int 13)])))]
3469 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3470 [(set_attr "type" "sselog")
3471 (set_attr "prefix" "vex")
3472 (set_attr "mode" "V8SF")])
3474 (define_insn "*avx_interleave_lowv4sf"
3475 [(set (match_operand:V4SF 0 "register_operand" "=x")
3478 (match_operand:V4SF 1 "register_operand" "x")
3479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3480 (parallel [(const_int 0) (const_int 4)
3481 (const_int 1) (const_int 5)])))]
3483 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3484 [(set_attr "type" "sselog")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3488 (define_insn "vec_interleave_lowv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3492 (match_operand:V4SF 1 "register_operand" "0")
3493 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3494 (parallel [(const_int 0) (const_int 4)
3495 (const_int 1) (const_int 5)])))]
3497 "unpcklps\t{%2, %0|%0, %2}"
3498 [(set_attr "type" "sselog")
3499 (set_attr "mode" "V4SF")])
3501 ;; These are modeled with the same vec_concat as the others so that we
3502 ;; capture users of shufps that can use the new instructions
3503 (define_insn "avx_movshdup256"
3504 [(set (match_operand:V8SF 0 "register_operand" "=x")
3507 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3509 (parallel [(const_int 1) (const_int 1)
3510 (const_int 3) (const_int 3)
3511 (const_int 5) (const_int 5)
3512 (const_int 7) (const_int 7)])))]
3514 "vmovshdup\t{%1, %0|%0, %1}"
3515 [(set_attr "type" "sse")
3516 (set_attr "prefix" "vex")
3517 (set_attr "mode" "V8SF")])
3519 (define_insn "sse3_movshdup"
3520 [(set (match_operand:V4SF 0 "register_operand" "=x")
3523 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3525 (parallel [(const_int 1)
3530 "%vmovshdup\t{%1, %0|%0, %1}"
3531 [(set_attr "type" "sse")
3532 (set_attr "prefix_rep" "1")
3533 (set_attr "prefix" "maybe_vex")
3534 (set_attr "mode" "V4SF")])
3536 (define_insn "avx_movsldup256"
3537 [(set (match_operand:V8SF 0 "register_operand" "=x")
3540 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3542 (parallel [(const_int 0) (const_int 0)
3543 (const_int 2) (const_int 2)
3544 (const_int 4) (const_int 4)
3545 (const_int 6) (const_int 6)])))]
3547 "vmovsldup\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sse")
3549 (set_attr "prefix" "vex")
3550 (set_attr "mode" "V8SF")])
3552 (define_insn "sse3_movsldup"
3553 [(set (match_operand:V4SF 0 "register_operand" "=x")
3556 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3558 (parallel [(const_int 0)
3563 "%vmovsldup\t{%1, %0|%0, %1}"
3564 [(set_attr "type" "sse")
3565 (set_attr "prefix_rep" "1")
3566 (set_attr "prefix" "maybe_vex")
3567 (set_attr "mode" "V4SF")])
3569 (define_expand "avx_shufps256"
3570 [(match_operand:V8SF 0 "register_operand" "")
3571 (match_operand:V8SF 1 "register_operand" "")
3572 (match_operand:V8SF 2 "nonimmediate_operand" "")
3573 (match_operand:SI 3 "const_int_operand" "")]
3576 int mask = INTVAL (operands[3]);
3577 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3578 GEN_INT ((mask >> 0) & 3),
3579 GEN_INT ((mask >> 2) & 3),
3580 GEN_INT (((mask >> 4) & 3) + 8),
3581 GEN_INT (((mask >> 6) & 3) + 8),
3582 GEN_INT (((mask >> 0) & 3) + 4),
3583 GEN_INT (((mask >> 2) & 3) + 4),
3584 GEN_INT (((mask >> 4) & 3) + 12),
3585 GEN_INT (((mask >> 6) & 3) + 12)));
3589 ;; One bit in mask selects 2 elements.
3590 (define_insn "avx_shufps256_1"
3591 [(set (match_operand:V8SF 0 "register_operand" "=x")
3594 (match_operand:V8SF 1 "register_operand" "x")
3595 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3596 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3597 (match_operand 4 "const_0_to_3_operand" "")
3598 (match_operand 5 "const_8_to_11_operand" "")
3599 (match_operand 6 "const_8_to_11_operand" "")
3600 (match_operand 7 "const_4_to_7_operand" "")
3601 (match_operand 8 "const_4_to_7_operand" "")
3602 (match_operand 9 "const_12_to_15_operand" "")
3603 (match_operand 10 "const_12_to_15_operand" "")])))]
3605 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3606 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3607 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3608 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3611 mask = INTVAL (operands[3]);
3612 mask |= INTVAL (operands[4]) << 2;
3613 mask |= (INTVAL (operands[5]) - 8) << 4;
3614 mask |= (INTVAL (operands[6]) - 8) << 6;
3615 operands[3] = GEN_INT (mask);
3617 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3619 [(set_attr "type" "sselog")
3620 (set_attr "length_immediate" "1")
3621 (set_attr "prefix" "vex")
3622 (set_attr "mode" "V8SF")])
3624 (define_expand "sse_shufps"
3625 [(match_operand:V4SF 0 "register_operand" "")
3626 (match_operand:V4SF 1 "register_operand" "")
3627 (match_operand:V4SF 2 "nonimmediate_operand" "")
3628 (match_operand:SI 3 "const_int_operand" "")]
3631 int mask = INTVAL (operands[3]);
3632 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3633 GEN_INT ((mask >> 0) & 3),
3634 GEN_INT ((mask >> 2) & 3),
3635 GEN_INT (((mask >> 4) & 3) + 4),
3636 GEN_INT (((mask >> 6) & 3) + 4)));
3640 (define_insn "*avx_shufps_<mode>"
3641 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3642 (vec_select:SSEMODE4S
3643 (vec_concat:<ssedoublesizemode>
3644 (match_operand:SSEMODE4S 1 "register_operand" "x")
3645 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3646 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3647 (match_operand 4 "const_0_to_3_operand" "")
3648 (match_operand 5 "const_4_to_7_operand" "")
3649 (match_operand 6 "const_4_to_7_operand" "")])))]
3653 mask |= INTVAL (operands[3]) << 0;
3654 mask |= INTVAL (operands[4]) << 2;
3655 mask |= (INTVAL (operands[5]) - 4) << 4;
3656 mask |= (INTVAL (operands[6]) - 4) << 6;
3657 operands[3] = GEN_INT (mask);
3659 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3661 [(set_attr "type" "sselog")
3662 (set_attr "length_immediate" "1")
3663 (set_attr "prefix" "vex")
3664 (set_attr "mode" "V4SF")])
3666 (define_insn "sse_shufps_<mode>"
3667 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3668 (vec_select:SSEMODE4S
3669 (vec_concat:<ssedoublesizemode>
3670 (match_operand:SSEMODE4S 1 "register_operand" "0")
3671 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3672 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3673 (match_operand 4 "const_0_to_3_operand" "")
3674 (match_operand 5 "const_4_to_7_operand" "")
3675 (match_operand 6 "const_4_to_7_operand" "")])))]
3679 mask |= INTVAL (operands[3]) << 0;
3680 mask |= INTVAL (operands[4]) << 2;
3681 mask |= (INTVAL (operands[5]) - 4) << 4;
3682 mask |= (INTVAL (operands[6]) - 4) << 6;
3683 operands[3] = GEN_INT (mask);
3685 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3687 [(set_attr "type" "sselog")
3688 (set_attr "length_immediate" "1")
3689 (set_attr "mode" "V4SF")])
3691 (define_insn "sse_storehps"
3692 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3694 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3695 (parallel [(const_int 2) (const_int 3)])))]
3698 %vmovhps\t{%1, %0|%0, %1}
3699 %vmovhlps\t{%1, %d0|%d0, %1}
3700 %vmovlps\t{%H1, %d0|%d0, %H1}"
3701 [(set_attr "type" "ssemov")
3702 (set_attr "prefix" "maybe_vex")
3703 (set_attr "mode" "V2SF,V4SF,V2SF")])
3705 (define_expand "sse_loadhps_exp"
3706 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3709 (match_operand:V4SF 1 "nonimmediate_operand" "")
3710 (parallel [(const_int 0) (const_int 1)]))
3711 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3713 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3715 (define_insn "*avx_loadhps"
3716 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3719 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3720 (parallel [(const_int 0) (const_int 1)]))
3721 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3724 vmovhps\t{%2, %1, %0|%0, %1, %2}
3725 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3726 vmovlps\t{%2, %H0|%H0, %2}"
3727 [(set_attr "type" "ssemov")
3728 (set_attr "prefix" "vex")
3729 (set_attr "mode" "V2SF,V4SF,V2SF")])
3731 (define_insn "sse_loadhps"
3732 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3735 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3736 (parallel [(const_int 0) (const_int 1)]))
3737 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3740 movhps\t{%2, %0|%0, %2}
3741 movlhps\t{%2, %0|%0, %2}
3742 movlps\t{%2, %H0|%H0, %2}"
3743 [(set_attr "type" "ssemov")
3744 (set_attr "mode" "V2SF,V4SF,V2SF")])
3746 (define_insn "*avx_storelps"
3747 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3749 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3750 (parallel [(const_int 0) (const_int 1)])))]
3753 vmovlps\t{%1, %0|%0, %1}
3754 vmovaps\t{%1, %0|%0, %1}
3755 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3756 [(set_attr "type" "ssemov")
3757 (set_attr "prefix" "vex")
3758 (set_attr "mode" "V2SF,V2DF,V2SF")])
3760 (define_insn "sse_storelps"
3761 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3763 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3764 (parallel [(const_int 0) (const_int 1)])))]
3767 movlps\t{%1, %0|%0, %1}
3768 movaps\t{%1, %0|%0, %1}
3769 movlps\t{%1, %0|%0, %1}"
3770 [(set_attr "type" "ssemov")
3771 (set_attr "mode" "V2SF,V4SF,V2SF")])
3773 (define_expand "sse_loadlps_exp"
3774 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3776 (match_operand:V2SF 2 "nonimmediate_operand" "")
3778 (match_operand:V4SF 1 "nonimmediate_operand" "")
3779 (parallel [(const_int 2) (const_int 3)]))))]
3781 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3783 (define_insn "*avx_loadlps"
3784 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3786 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3788 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3789 (parallel [(const_int 2) (const_int 3)]))))]
3792 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3793 vmovlps\t{%2, %1, %0|%0, %1, %2}
3794 vmovlps\t{%2, %0|%0, %2}"
3795 [(set_attr "type" "sselog,ssemov,ssemov")
3796 (set_attr "length_immediate" "1,*,*")
3797 (set_attr "prefix" "vex")
3798 (set_attr "mode" "V4SF,V2SF,V2SF")])
3800 (define_insn "sse_loadlps"
3801 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3803 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3805 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3806 (parallel [(const_int 2) (const_int 3)]))))]
3809 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3810 movlps\t{%2, %0|%0, %2}
3811 movlps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "sselog,ssemov,ssemov")
3813 (set_attr "length_immediate" "1,*,*")
3814 (set_attr "mode" "V4SF,V2SF,V2SF")])
3816 (define_insn "*avx_movss"
3817 [(set (match_operand:V4SF 0 "register_operand" "=x")
3819 (match_operand:V4SF 2 "register_operand" "x")
3820 (match_operand:V4SF 1 "register_operand" "x")
3823 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3824 [(set_attr "type" "ssemov")
3825 (set_attr "prefix" "vex")
3826 (set_attr "mode" "SF")])
3828 (define_insn "sse_movss"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x")
3831 (match_operand:V4SF 2 "register_operand" "x")
3832 (match_operand:V4SF 1 "register_operand" "0")
3835 "movss\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "ssemov")
3837 (set_attr "mode" "SF")])
3839 (define_expand "vec_dupv4sf"
3840 [(set (match_operand:V4SF 0 "register_operand" "")
3842 (match_operand:SF 1 "nonimmediate_operand" "")))]
3846 operands[1] = force_reg (V4SFmode, operands[1]);
3849 (define_insn "*vec_dupv4sf_avx"
3850 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3852 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3855 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3856 vbroadcastss\t{%1, %0|%0, %1}"
3857 [(set_attr "type" "sselog1,ssemov")
3858 (set_attr "length_immediate" "1,0")
3859 (set_attr "prefix_extra" "0,1")
3860 (set_attr "prefix" "vex")
3861 (set_attr "mode" "V4SF")])
3863 (define_insn "*vec_dupv4sf"
3864 [(set (match_operand:V4SF 0 "register_operand" "=x")
3866 (match_operand:SF 1 "register_operand" "0")))]
3868 "shufps\t{$0, %0, %0|%0, %0, 0}"
3869 [(set_attr "type" "sselog1")
3870 (set_attr "length_immediate" "1")
3871 (set_attr "mode" "V4SF")])
3873 (define_insn "*vec_concatv2sf_avx"
3874 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3876 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3877 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3880 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3881 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3882 vmovss\t{%1, %0|%0, %1}
3883 punpckldq\t{%2, %0|%0, %2}
3884 movd\t{%1, %0|%0, %1}"
3885 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3886 (set_attr "length_immediate" "*,1,*,*,*")
3887 (set_attr "prefix_extra" "*,1,*,*,*")
3888 (set (attr "prefix")
3889 (if_then_else (eq_attr "alternative" "3,4")
3890 (const_string "orig")
3891 (const_string "vex")))
3892 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3894 ;; Although insertps takes register source, we prefer
3895 ;; unpcklps with register source since it is shorter.
3896 (define_insn "*vec_concatv2sf_sse4_1"
3897 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3899 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3900 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3903 unpcklps\t{%2, %0|%0, %2}
3904 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3905 movss\t{%1, %0|%0, %1}
3906 punpckldq\t{%2, %0|%0, %2}
3907 movd\t{%1, %0|%0, %1}"
3908 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3909 (set_attr "prefix_data16" "*,1,*,*,*")
3910 (set_attr "prefix_extra" "*,1,*,*,*")
3911 (set_attr "length_immediate" "*,1,*,*,*")
3912 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3914 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3915 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3916 ;; alternatives pretty much forces the MMX alternative to be chosen.
3917 (define_insn "*vec_concatv2sf_sse"
3918 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3920 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3921 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3924 unpcklps\t{%2, %0|%0, %2}
3925 movss\t{%1, %0|%0, %1}
3926 punpckldq\t{%2, %0|%0, %2}
3927 movd\t{%1, %0|%0, %1}"
3928 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3929 (set_attr "mode" "V4SF,SF,DI,DI")])
3931 (define_insn "*vec_concatv4sf_avx"
3932 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3934 (match_operand:V2SF 1 "register_operand" " x,x")
3935 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3938 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3939 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3940 [(set_attr "type" "ssemov")
3941 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V4SF,V2SF")])
3944 (define_insn "*vec_concatv4sf_sse"
3945 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3947 (match_operand:V2SF 1 "register_operand" " 0,0")
3948 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3951 movlhps\t{%2, %0|%0, %2}
3952 movhps\t{%2, %0|%0, %2}"
3953 [(set_attr "type" "ssemov")
3954 (set_attr "mode" "V4SF,V2SF")])
3956 (define_expand "vec_init<mode>"
3957 [(match_operand:SSEMODE 0 "register_operand" "")
3958 (match_operand 1 "" "")]
3961 ix86_expand_vector_init (false, operands[0], operands[1]);
3965 (define_insn "*vec_set<mode>_0_avx"
3966 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3967 (vec_merge:SSEMODE4S
3968 (vec_duplicate:SSEMODE4S
3969 (match_operand:<ssescalarmode> 2
3970 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3971 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3975 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3976 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3977 vmovd\t{%2, %0|%0, %2}
3978 vmovss\t{%2, %1, %0|%0, %1, %2}
3979 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3981 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3982 (set_attr "prefix_extra" "*,*,*,*,1,*")
3983 (set_attr "length_immediate" "*,*,*,*,1,*")
3984 (set_attr "prefix" "vex")
3985 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3987 (define_insn "*vec_set<mode>_0_sse4_1"
3988 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3989 (vec_merge:SSEMODE4S
3990 (vec_duplicate:SSEMODE4S
3991 (match_operand:<ssescalarmode> 2
3992 "general_operand" " x,m,*r,x,*rm,*rfF"))
3993 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3997 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3998 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3999 movd\t{%2, %0|%0, %2}
4000 movss\t{%2, %0|%0, %2}
4001 pinsrd\t{$0, %2, %0|%0, %2, 0}
4003 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4004 (set_attr "prefix_extra" "*,*,*,*,1,*")
4005 (set_attr "length_immediate" "*,*,*,*,1,*")
4006 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4008 (define_insn "*vec_set<mode>_0_sse2"
4009 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4010 (vec_merge:SSEMODE4S
4011 (vec_duplicate:SSEMODE4S
4012 (match_operand:<ssescalarmode> 2
4013 "general_operand" " m,*r,x,x*rfF"))
4014 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4018 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4019 movd\t{%2, %0|%0, %2}
4020 movss\t{%2, %0|%0, %2}
4022 [(set_attr "type" "ssemov")
4023 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4025 (define_insn "vec_set<mode>_0"
4026 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4027 (vec_merge:SSEMODE4S
4028 (vec_duplicate:SSEMODE4S
4029 (match_operand:<ssescalarmode> 2
4030 "general_operand" " m,x,x*rfF"))
4031 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4035 movss\t{%2, %0|%0, %2}
4036 movss\t{%2, %0|%0, %2}
4038 [(set_attr "type" "ssemov")
4039 (set_attr "mode" "SF,SF,*")])
4041 ;; A subset is vec_setv4sf.
4042 (define_insn "*vec_setv4sf_avx"
4043 [(set (match_operand:V4SF 0 "register_operand" "=x")
4046 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4047 (match_operand:V4SF 1 "register_operand" "x")
4048 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4051 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4052 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4054 [(set_attr "type" "sselog")
4055 (set_attr "prefix_extra" "1")
4056 (set_attr "length_immediate" "1")
4057 (set_attr "prefix" "vex")
4058 (set_attr "mode" "V4SF")])
4060 (define_insn "*vec_setv4sf_sse4_1"
4061 [(set (match_operand:V4SF 0 "register_operand" "=x")
4064 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4065 (match_operand:V4SF 1 "register_operand" "0")
4066 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4069 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4070 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4072 [(set_attr "type" "sselog")
4073 (set_attr "prefix_data16" "1")
4074 (set_attr "prefix_extra" "1")
4075 (set_attr "length_immediate" "1")
4076 (set_attr "mode" "V4SF")])
4078 (define_insn "*avx_insertps"
4079 [(set (match_operand:V4SF 0 "register_operand" "=x")
4080 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4081 (match_operand:V4SF 1 "register_operand" "x")
4082 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4085 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4086 [(set_attr "type" "sselog")
4087 (set_attr "prefix" "vex")
4088 (set_attr "prefix_extra" "1")
4089 (set_attr "length_immediate" "1")
4090 (set_attr "mode" "V4SF")])
4092 (define_insn "sse4_1_insertps"
4093 [(set (match_operand:V4SF 0 "register_operand" "=x")
4094 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4095 (match_operand:V4SF 1 "register_operand" "0")
4096 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4099 "insertps\t{%3, %2, %0|%0, %2, %3}";
4100 [(set_attr "type" "sselog")
4101 (set_attr "prefix_data16" "1")
4102 (set_attr "prefix_extra" "1")
4103 (set_attr "length_immediate" "1")
4104 (set_attr "mode" "V4SF")])
4107 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4108 (vec_merge:SSEMODE4S
4109 (vec_duplicate:SSEMODE4S
4110 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4113 "TARGET_SSE && reload_completed"
4116 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4121 (define_expand "vec_set<mode>"
4122 [(match_operand:SSEMODE 0 "register_operand" "")
4123 (match_operand:<ssescalarmode> 1 "register_operand" "")
4124 (match_operand 2 "const_int_operand" "")]
4127 ix86_expand_vector_set (false, operands[0], operands[1],
4128 INTVAL (operands[2]));
4132 (define_insn_and_split "*vec_extractv4sf_0"
4133 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4135 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4136 (parallel [(const_int 0)])))]
4137 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4139 "&& reload_completed"
4142 rtx op1 = operands[1];
4144 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4146 op1 = gen_lowpart (SFmode, op1);
4147 emit_move_insn (operands[0], op1);
4151 (define_expand "avx_vextractf128<mode>"
4152 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4153 (match_operand:AVX256MODE 1 "register_operand" "")
4154 (match_operand:SI 2 "const_0_to_1_operand" "")]
4157 switch (INTVAL (operands[2]))
4160 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4163 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4171 (define_insn_and_split "vec_extract_lo_<mode>"
4172 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4173 (vec_select:<avxhalfvecmode>
4174 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4175 (parallel [(const_int 0) (const_int 1)])))]
4178 "&& reload_completed"
4181 rtx op1 = operands[1];
4183 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4185 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4186 emit_move_insn (operands[0], op1);
4190 (define_insn "vec_extract_hi_<mode>"
4191 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4192 (vec_select:<avxhalfvecmode>
4193 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4194 (parallel [(const_int 2) (const_int 3)])))]
4196 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4197 [(set_attr "type" "sselog")
4198 (set_attr "prefix_extra" "1")
4199 (set_attr "length_immediate" "1")
4200 (set_attr "memory" "none,store")
4201 (set_attr "prefix" "vex")
4202 (set_attr "mode" "V8SF")])
4204 (define_insn_and_split "vec_extract_lo_<mode>"
4205 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4206 (vec_select:<avxhalfvecmode>
4207 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4208 (parallel [(const_int 0) (const_int 1)
4209 (const_int 2) (const_int 3)])))]
4212 "&& reload_completed"
4215 rtx op1 = operands[1];
4217 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4219 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4220 emit_move_insn (operands[0], op1);
4224 (define_insn "vec_extract_hi_<mode>"
4225 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4226 (vec_select:<avxhalfvecmode>
4227 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4228 (parallel [(const_int 4) (const_int 5)
4229 (const_int 6) (const_int 7)])))]
4231 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_extra" "1")
4234 (set_attr "length_immediate" "1")
4235 (set_attr "memory" "none,store")
4236 (set_attr "prefix" "vex")
4237 (set_attr "mode" "V8SF")])
4239 (define_insn_and_split "vec_extract_lo_v16hi"
4240 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4242 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4243 (parallel [(const_int 0) (const_int 1)
4244 (const_int 2) (const_int 3)
4245 (const_int 4) (const_int 5)
4246 (const_int 6) (const_int 7)])))]
4249 "&& reload_completed"
4252 rtx op1 = operands[1];
4254 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4256 op1 = gen_lowpart (V8HImode, op1);
4257 emit_move_insn (operands[0], op1);
4261 (define_insn "vec_extract_hi_v16hi"
4262 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4264 (match_operand:V16HI 1 "register_operand" "x,x")
4265 (parallel [(const_int 8) (const_int 9)
4266 (const_int 10) (const_int 11)
4267 (const_int 12) (const_int 13)
4268 (const_int 14) (const_int 15)])))]
4270 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4271 [(set_attr "type" "sselog")
4272 (set_attr "prefix_extra" "1")
4273 (set_attr "length_immediate" "1")
4274 (set_attr "memory" "none,store")
4275 (set_attr "prefix" "vex")
4276 (set_attr "mode" "V8SF")])
4278 (define_insn_and_split "vec_extract_lo_v32qi"
4279 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4281 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4282 (parallel [(const_int 0) (const_int 1)
4283 (const_int 2) (const_int 3)
4284 (const_int 4) (const_int 5)
4285 (const_int 6) (const_int 7)
4286 (const_int 8) (const_int 9)
4287 (const_int 10) (const_int 11)
4288 (const_int 12) (const_int 13)
4289 (const_int 14) (const_int 15)])))]
4292 "&& reload_completed"
4295 rtx op1 = operands[1];
4297 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4299 op1 = gen_lowpart (V16QImode, op1);
4300 emit_move_insn (operands[0], op1);
4304 (define_insn "vec_extract_hi_v32qi"
4305 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4307 (match_operand:V32QI 1 "register_operand" "x,x")
4308 (parallel [(const_int 16) (const_int 17)
4309 (const_int 18) (const_int 19)
4310 (const_int 20) (const_int 21)
4311 (const_int 22) (const_int 23)
4312 (const_int 24) (const_int 25)
4313 (const_int 26) (const_int 27)
4314 (const_int 28) (const_int 29)
4315 (const_int 30) (const_int 31)])))]
4317 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4318 [(set_attr "type" "sselog")
4319 (set_attr "prefix_extra" "1")
4320 (set_attr "length_immediate" "1")
4321 (set_attr "memory" "none,store")
4322 (set_attr "prefix" "vex")
4323 (set_attr "mode" "V8SF")])
4325 (define_insn "*sse4_1_extractps"
4326 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4328 (match_operand:V4SF 1 "register_operand" "x")
4329 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4331 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4332 [(set_attr "type" "sselog")
4333 (set_attr "prefix_data16" "1")
4334 (set_attr "prefix_extra" "1")
4335 (set_attr "length_immediate" "1")
4336 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V4SF")])
4339 (define_insn_and_split "*vec_extract_v4sf_mem"
4340 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4342 (match_operand:V4SF 1 "memory_operand" "o")
4343 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4349 int i = INTVAL (operands[2]);
4351 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4355 (define_expand "vec_extract<mode>"
4356 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4357 (match_operand:SSEMODE 1 "register_operand" "")
4358 (match_operand 2 "const_int_operand" "")]
4361 ix86_expand_vector_extract (false, operands[0], operands[1],
4362 INTVAL (operands[2]));
4366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4368 ;; Parallel double-precision floating point element swizzling
4370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4372 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4373 (define_insn "avx_unpckhpd256"
4374 [(set (match_operand:V4DF 0 "register_operand" "=x")
4377 (match_operand:V4DF 1 "register_operand" "x")
4378 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4379 (parallel [(const_int 1) (const_int 5)
4380 (const_int 3) (const_int 7)])))]
4382 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4383 [(set_attr "type" "sselog")
4384 (set_attr "prefix" "vex")
4385 (set_attr "mode" "V4DF")])
4387 (define_expand "vec_interleave_highv2df"
4388 [(set (match_operand:V2DF 0 "register_operand" "")
4391 (match_operand:V2DF 1 "nonimmediate_operand" "")
4392 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4393 (parallel [(const_int 1)
4397 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4398 operands[2] = force_reg (V2DFmode, operands[2]);
4401 (define_insn "*avx_interleave_highv2df"
4402 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4405 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4406 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4407 (parallel [(const_int 1)
4409 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4411 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4412 vmovddup\t{%H1, %0|%0, %H1}
4413 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4414 vmovhpd\t{%1, %0|%0, %1}"
4415 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4416 (set_attr "prefix" "vex")
4417 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4419 (define_insn "*sse3_interleave_highv2df"
4420 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4423 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4424 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4425 (parallel [(const_int 1)
4427 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4429 unpckhpd\t{%2, %0|%0, %2}
4430 movddup\t{%H1, %0|%0, %H1}
4431 movlpd\t{%H1, %0|%0, %H1}
4432 movhpd\t{%1, %0|%0, %1}"
4433 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4434 (set_attr "prefix_data16" "*,*,1,1")
4435 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4437 (define_insn "*sse2_interleave_highv2df"
4438 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4441 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4442 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4443 (parallel [(const_int 1)
4445 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4447 unpckhpd\t{%2, %0|%0, %2}
4448 movlpd\t{%H1, %0|%0, %H1}
4449 movhpd\t{%1, %0|%0, %1}"
4450 [(set_attr "type" "sselog,ssemov,ssemov")
4451 (set_attr "prefix_data16" "*,1,1")
4452 (set_attr "mode" "V2DF,V1DF,V1DF")])
4454 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4455 (define_expand "avx_movddup256"
4456 [(set (match_operand:V4DF 0 "register_operand" "")
4459 (match_operand:V4DF 1 "nonimmediate_operand" "")
4461 (parallel [(const_int 0) (const_int 4)
4462 (const_int 2) (const_int 6)])))]
4465 (define_expand "avx_unpcklpd256"
4466 [(set (match_operand:V4DF 0 "register_operand" "")
4469 (match_operand:V4DF 1 "register_operand" "")
4470 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4471 (parallel [(const_int 0) (const_int 4)
4472 (const_int 2) (const_int 6)])))]
4475 (define_insn "*avx_unpcklpd256"
4476 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4479 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4480 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4481 (parallel [(const_int 0) (const_int 4)
4482 (const_int 2) (const_int 6)])))]
4484 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4486 vmovddup\t{%1, %0|%0, %1}
4487 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4488 [(set_attr "type" "sselog")
4489 (set_attr "prefix" "vex")
4490 (set_attr "mode" "V4DF")])
4492 (define_expand "vec_interleave_lowv2df"
4493 [(set (match_operand:V2DF 0 "register_operand" "")
4496 (match_operand:V2DF 1 "nonimmediate_operand" "")
4497 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4498 (parallel [(const_int 0)
4502 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4503 operands[1] = force_reg (V2DFmode, operands[1]);
4506 (define_insn "*avx_interleave_lowv2df"
4507 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4510 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4512 (parallel [(const_int 0)
4514 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4516 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4517 vmovddup\t{%1, %0|%0, %1}
4518 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4519 vmovlpd\t{%2, %H0|%H0, %2}"
4520 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4521 (set_attr "prefix" "vex")
4522 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4524 (define_insn "*sse3_interleave_lowv2df"
4525 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4528 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4529 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4530 (parallel [(const_int 0)
4532 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4534 unpcklpd\t{%2, %0|%0, %2}
4535 movddup\t{%1, %0|%0, %1}
4536 movhpd\t{%2, %0|%0, %2}
4537 movlpd\t{%2, %H0|%H0, %2}"
4538 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4539 (set_attr "prefix_data16" "*,*,1,1")
4540 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4542 (define_insn "*sse2_interleave_lowv2df"
4543 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4546 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4547 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4548 (parallel [(const_int 0)
4550 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4552 unpcklpd\t{%2, %0|%0, %2}
4553 movhpd\t{%2, %0|%0, %2}
4554 movlpd\t{%2, %H0|%H0, %2}"
4555 [(set_attr "type" "sselog,ssemov,ssemov")
4556 (set_attr "prefix_data16" "*,1,1")
4557 (set_attr "mode" "V2DF,V1DF,V1DF")])
4560 [(set (match_operand:V2DF 0 "memory_operand" "")
4563 (match_operand:V2DF 1 "register_operand" "")
4565 (parallel [(const_int 0)
4567 "TARGET_SSE3 && reload_completed"
4570 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4571 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4572 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4577 [(set (match_operand:V2DF 0 "register_operand" "")
4580 (match_operand:V2DF 1 "memory_operand" "")
4582 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4583 (match_operand:SI 3 "const_int_operand" "")])))]
4584 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4585 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4587 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4590 (define_expand "avx_shufpd256"
4591 [(match_operand:V4DF 0 "register_operand" "")
4592 (match_operand:V4DF 1 "register_operand" "")
4593 (match_operand:V4DF 2 "nonimmediate_operand" "")
4594 (match_operand:SI 3 "const_int_operand" "")]
4597 int mask = INTVAL (operands[3]);
4598 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4600 GEN_INT (mask & 2 ? 5 : 4),
4601 GEN_INT (mask & 4 ? 3 : 2),
4602 GEN_INT (mask & 8 ? 7 : 6)));
4606 (define_insn "avx_shufpd256_1"
4607 [(set (match_operand:V4DF 0 "register_operand" "=x")
4610 (match_operand:V4DF 1 "register_operand" "x")
4611 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4612 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4613 (match_operand 4 "const_4_to_5_operand" "")
4614 (match_operand 5 "const_2_to_3_operand" "")
4615 (match_operand 6 "const_6_to_7_operand" "")])))]
4619 mask = INTVAL (operands[3]);
4620 mask |= (INTVAL (operands[4]) - 4) << 1;
4621 mask |= (INTVAL (operands[5]) - 2) << 2;
4622 mask |= (INTVAL (operands[6]) - 6) << 3;
4623 operands[3] = GEN_INT (mask);
4625 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4627 [(set_attr "type" "sselog")
4628 (set_attr "length_immediate" "1")
4629 (set_attr "prefix" "vex")
4630 (set_attr "mode" "V4DF")])
4632 (define_expand "sse2_shufpd"
4633 [(match_operand:V2DF 0 "register_operand" "")
4634 (match_operand:V2DF 1 "register_operand" "")
4635 (match_operand:V2DF 2 "nonimmediate_operand" "")
4636 (match_operand:SI 3 "const_int_operand" "")]
4639 int mask = INTVAL (operands[3]);
4640 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4642 GEN_INT (mask & 2 ? 3 : 2)));
4646 (define_expand "vec_extract_even<mode>"
4647 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4648 (match_operand:SSEMODE_EO 1 "register_operand" "")
4649 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4652 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4656 (define_expand "vec_extract_odd<mode>"
4657 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4658 (match_operand:SSEMODE_EO 1 "register_operand" "")
4659 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4662 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4666 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4667 (define_insn "*avx_interleave_highv2di"
4668 [(set (match_operand:V2DI 0 "register_operand" "=x")
4671 (match_operand:V2DI 1 "register_operand" "x")
4672 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4673 (parallel [(const_int 1)
4676 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4677 [(set_attr "type" "sselog")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "TI")])
4681 (define_insn "vec_interleave_highv2di"
4682 [(set (match_operand:V2DI 0 "register_operand" "=x")
4685 (match_operand:V2DI 1 "register_operand" "0")
4686 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4687 (parallel [(const_int 1)
4690 "punpckhqdq\t{%2, %0|%0, %2}"
4691 [(set_attr "type" "sselog")
4692 (set_attr "prefix_data16" "1")
4693 (set_attr "mode" "TI")])
4695 (define_insn "*avx_interleave_lowv2di"
4696 [(set (match_operand:V2DI 0 "register_operand" "=x")
4699 (match_operand:V2DI 1 "register_operand" "x")
4700 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4701 (parallel [(const_int 0)
4704 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4705 [(set_attr "type" "sselog")
4706 (set_attr "prefix" "vex")
4707 (set_attr "mode" "TI")])
4709 (define_insn "vec_interleave_lowv2di"
4710 [(set (match_operand:V2DI 0 "register_operand" "=x")
4713 (match_operand:V2DI 1 "register_operand" "0")
4714 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4715 (parallel [(const_int 0)
4718 "punpcklqdq\t{%2, %0|%0, %2}"
4719 [(set_attr "type" "sselog")
4720 (set_attr "prefix_data16" "1")
4721 (set_attr "mode" "TI")])
4723 (define_insn "*avx_shufpd_<mode>"
4724 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4725 (vec_select:SSEMODE2D
4726 (vec_concat:<ssedoublesizemode>
4727 (match_operand:SSEMODE2D 1 "register_operand" "x")
4728 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4729 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4730 (match_operand 4 "const_2_to_3_operand" "")])))]
4734 mask = INTVAL (operands[3]);
4735 mask |= (INTVAL (operands[4]) - 2) << 1;
4736 operands[3] = GEN_INT (mask);
4738 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4740 [(set_attr "type" "sselog")
4741 (set_attr "length_immediate" "1")
4742 (set_attr "prefix" "vex")
4743 (set_attr "mode" "V2DF")])
4745 (define_insn "sse2_shufpd_<mode>"
4746 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4747 (vec_select:SSEMODE2D
4748 (vec_concat:<ssedoublesizemode>
4749 (match_operand:SSEMODE2D 1 "register_operand" "0")
4750 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4751 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4752 (match_operand 4 "const_2_to_3_operand" "")])))]
4756 mask = INTVAL (operands[3]);
4757 mask |= (INTVAL (operands[4]) - 2) << 1;
4758 operands[3] = GEN_INT (mask);
4760 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4762 [(set_attr "type" "sselog")
4763 (set_attr "length_immediate" "1")
4764 (set_attr "mode" "V2DF")])
4766 ;; Avoid combining registers from different units in a single alternative,
4767 ;; see comment above inline_secondary_memory_needed function in i386.c
4768 (define_insn "*avx_storehpd"
4769 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4771 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4772 (parallel [(const_int 1)])))]
4773 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4775 vmovhpd\t{%1, %0|%0, %1}
4776 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4780 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4781 (set_attr "prefix" "vex")
4782 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4784 (define_insn "sse2_storehpd"
4785 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4787 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4788 (parallel [(const_int 1)])))]
4789 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4791 movhpd\t{%1, %0|%0, %1}
4796 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4797 (set_attr "prefix_data16" "1,*,*,*,*")
4798 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4801 [(set (match_operand:DF 0 "register_operand" "")
4803 (match_operand:V2DF 1 "memory_operand" "")
4804 (parallel [(const_int 1)])))]
4805 "TARGET_SSE2 && reload_completed"
4806 [(set (match_dup 0) (match_dup 1))]
4807 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4809 ;; Avoid combining registers from different units in a single alternative,
4810 ;; see comment above inline_secondary_memory_needed function in i386.c
4811 (define_insn "sse2_storelpd"
4812 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4814 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4815 (parallel [(const_int 0)])))]
4816 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4818 %vmovlpd\t{%1, %0|%0, %1}
4823 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4824 (set_attr "prefix_data16" "1,*,*,*,*")
4825 (set_attr "prefix" "maybe_vex")
4826 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4829 [(set (match_operand:DF 0 "register_operand" "")
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 0)])))]
4833 "TARGET_SSE2 && reload_completed"
4836 rtx op1 = operands[1];
4838 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4840 op1 = gen_lowpart (DFmode, op1);
4841 emit_move_insn (operands[0], op1);
4845 (define_expand "sse2_loadhpd_exp"
4846 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4849 (match_operand:V2DF 1 "nonimmediate_operand" "")
4850 (parallel [(const_int 0)]))
4851 (match_operand:DF 2 "nonimmediate_operand" "")))]
4853 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4855 ;; Avoid combining registers from different units in a single alternative,
4856 ;; see comment above inline_secondary_memory_needed function in i386.c
4857 (define_insn "*avx_loadhpd"
4858 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4861 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4862 (parallel [(const_int 0)]))
4863 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4864 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4866 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4867 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4871 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4872 (set_attr "prefix" "vex")
4873 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4875 (define_insn "sse2_loadhpd"
4876 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4879 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4880 (parallel [(const_int 0)]))
4881 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4882 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4884 movhpd\t{%2, %0|%0, %2}
4885 unpcklpd\t{%2, %0|%0, %2}
4886 shufpd\t{$1, %1, %0|%0, %1, 1}
4890 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4891 (set_attr "prefix_data16" "1,*,*,*,*,*")
4892 (set_attr "length_immediate" "*,*,1,*,*,*")
4893 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4896 [(set (match_operand:V2DF 0 "memory_operand" "")
4898 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4899 (match_operand:DF 1 "register_operand" "")))]
4900 "TARGET_SSE2 && reload_completed"
4901 [(set (match_dup 0) (match_dup 1))]
4902 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4904 (define_expand "sse2_loadlpd_exp"
4905 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4907 (match_operand:DF 2 "nonimmediate_operand" "")
4909 (match_operand:V2DF 1 "nonimmediate_operand" "")
4910 (parallel [(const_int 1)]))))]
4912 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4914 ;; Avoid combining registers from different units in a single alternative,
4915 ;; see comment above inline_secondary_memory_needed function in i386.c
4916 (define_insn "*avx_loadlpd"
4917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4919 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4921 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4922 (parallel [(const_int 1)]))))]
4923 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4925 vmovsd\t{%2, %0|%0, %2}
4926 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4927 vmovsd\t{%2, %1, %0|%0, %1, %2}
4928 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4932 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4933 (set_attr "prefix" "vex")
4934 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4936 (define_insn "sse2_loadlpd"
4937 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4939 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4941 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4942 (parallel [(const_int 1)]))))]
4943 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4945 movsd\t{%2, %0|%0, %2}
4946 movlpd\t{%2, %0|%0, %2}
4947 movsd\t{%2, %0|%0, %2}
4948 shufpd\t{$2, %2, %0|%0, %2, 2}
4949 movhpd\t{%H1, %0|%0, %H1}
4953 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4954 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4955 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4956 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4959 [(set (match_operand:V2DF 0 "memory_operand" "")
4961 (match_operand:DF 1 "register_operand" "")
4962 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4963 "TARGET_SSE2 && reload_completed"
4964 [(set (match_dup 0) (match_dup 1))]
4965 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4967 ;; Not sure these two are ever used, but it doesn't hurt to have
4969 (define_insn "*vec_extractv2df_1_sse"
4970 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4972 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4973 (parallel [(const_int 1)])))]
4974 "!TARGET_SSE2 && TARGET_SSE
4975 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4977 movhps\t{%1, %0|%0, %1}
4978 movhlps\t{%1, %0|%0, %1}
4979 movlps\t{%H1, %0|%0, %H1}"
4980 [(set_attr "type" "ssemov")
4981 (set_attr "mode" "V2SF,V4SF,V2SF")])
4983 (define_insn "*vec_extractv2df_0_sse"
4984 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4986 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4987 (parallel [(const_int 0)])))]
4988 "!TARGET_SSE2 && TARGET_SSE
4989 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4991 movlps\t{%1, %0|%0, %1}
4992 movaps\t{%1, %0|%0, %1}
4993 movlps\t{%1, %0|%0, %1}"
4994 [(set_attr "type" "ssemov")
4995 (set_attr "mode" "V2SF,V4SF,V2SF")])
4997 (define_insn "*avx_movsd"
4998 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5000 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5001 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5005 vmovsd\t{%2, %1, %0|%0, %1, %2}
5006 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5007 vmovlpd\t{%2, %0|%0, %2}
5008 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5009 vmovhps\t{%1, %H0|%H0, %1}"
5010 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5011 (set_attr "prefix" "vex")
5012 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5014 (define_insn "sse2_movsd"
5015 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5017 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5018 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5022 movsd\t{%2, %0|%0, %2}
5023 movlpd\t{%2, %0|%0, %2}
5024 movlpd\t{%2, %0|%0, %2}
5025 shufpd\t{$2, %2, %0|%0, %2, 2}
5026 movhps\t{%H1, %0|%0, %H1}
5027 movhps\t{%1, %H0|%H0, %1}"
5028 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5029 (set_attr "prefix_data16" "*,1,1,*,*,*")
5030 (set_attr "length_immediate" "*,*,*,1,*,*")
5031 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5033 (define_insn "*vec_dupv2df_sse3"
5034 [(set (match_operand:V2DF 0 "register_operand" "=x")
5036 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5038 "%vmovddup\t{%1, %0|%0, %1}"
5039 [(set_attr "type" "sselog1")
5040 (set_attr "prefix" "maybe_vex")
5041 (set_attr "mode" "DF")])
5043 (define_insn "vec_dupv2df"
5044 [(set (match_operand:V2DF 0 "register_operand" "=x")
5046 (match_operand:DF 1 "register_operand" "0")))]
5049 [(set_attr "type" "sselog1")
5050 (set_attr "mode" "V2DF")])
5052 (define_insn "*vec_concatv2df_sse3"
5053 [(set (match_operand:V2DF 0 "register_operand" "=x")
5055 (match_operand:DF 1 "nonimmediate_operand" "xm")
5058 "%vmovddup\t{%1, %0|%0, %1}"
5059 [(set_attr "type" "sselog1")
5060 (set_attr "prefix" "maybe_vex")
5061 (set_attr "mode" "DF")])
5063 (define_insn "*vec_concatv2df_avx"
5064 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5066 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5067 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5070 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5071 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5072 vmovsd\t{%1, %0|%0, %1}"
5073 [(set_attr "type" "ssemov")
5074 (set_attr "prefix" "vex")
5075 (set_attr "mode" "DF,V1DF,DF")])
5077 (define_insn "*vec_concatv2df"
5078 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5080 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5081 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5084 unpcklpd\t{%2, %0|%0, %2}
5085 movhpd\t{%2, %0|%0, %2}
5086 movsd\t{%1, %0|%0, %1}
5087 movlhps\t{%2, %0|%0, %2}
5088 movhps\t{%2, %0|%0, %2}"
5089 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5090 (set_attr "prefix_data16" "*,1,*,*,*")
5091 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5095 ;; Parallel integral arithmetic
5097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5099 (define_expand "neg<mode>2"
5100 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5103 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5105 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5107 (define_expand "<plusminus_insn><mode>3"
5108 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5110 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5111 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5113 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5115 (define_insn "*avx_<plusminus_insn><mode>3"
5116 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5118 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5119 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5120 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5121 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5122 [(set_attr "type" "sseiadd")
5123 (set_attr "prefix" "vex")
5124 (set_attr "mode" "TI")])
5126 (define_insn "*<plusminus_insn><mode>3"
5127 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5129 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5130 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5131 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5132 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5133 [(set_attr "type" "sseiadd")
5134 (set_attr "prefix_data16" "1")
5135 (set_attr "mode" "TI")])
5137 (define_expand "sse2_<plusminus_insn><mode>3"
5138 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5139 (sat_plusminus:SSEMODE12
5140 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5141 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5143 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5145 (define_insn "*avx_<plusminus_insn><mode>3"
5146 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5147 (sat_plusminus:SSEMODE12
5148 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5149 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5150 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5151 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5152 [(set_attr "type" "sseiadd")
5153 (set_attr "prefix" "vex")
5154 (set_attr "mode" "TI")])
5156 (define_insn "*sse2_<plusminus_insn><mode>3"
5157 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5158 (sat_plusminus:SSEMODE12
5159 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5160 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5161 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5162 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5163 [(set_attr "type" "sseiadd")
5164 (set_attr "prefix_data16" "1")
5165 (set_attr "mode" "TI")])
5167 (define_insn_and_split "mulv16qi3"
5168 [(set (match_operand:V16QI 0 "register_operand" "")
5169 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5170 (match_operand:V16QI 2 "register_operand" "")))]
5172 && can_create_pseudo_p ()"
5180 for (i = 0; i < 6; ++i)
5181 t[i] = gen_reg_rtx (V16QImode);
5183 /* Unpack data such that we've got a source byte in each low byte of
5184 each word. We don't care what goes into the high byte of each word.
5185 Rather than trying to get zero in there, most convenient is to let
5186 it be a copy of the low byte. */
5187 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5188 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5189 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5190 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5192 /* Multiply words. The end-of-line annotations here give a picture of what
5193 the output of that instruction looks like. Dot means don't care; the
5194 letters are the bytes of the result with A being the most significant. */
5195 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5196 gen_lowpart (V8HImode, t[0]),
5197 gen_lowpart (V8HImode, t[1])));
5198 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5199 gen_lowpart (V8HImode, t[2]),
5200 gen_lowpart (V8HImode, t[3])));
5202 /* Extract the even bytes and merge them back together. */
5203 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5207 (define_expand "mulv8hi3"
5208 [(set (match_operand:V8HI 0 "register_operand" "")
5209 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5210 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5212 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5214 (define_insn "*avx_mulv8hi3"
5215 [(set (match_operand:V8HI 0 "register_operand" "=x")
5216 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5218 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5219 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5220 [(set_attr "type" "sseimul")
5221 (set_attr "prefix" "vex")
5222 (set_attr "mode" "TI")])
5224 (define_insn "*mulv8hi3"
5225 [(set (match_operand:V8HI 0 "register_operand" "=x")
5226 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5227 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5229 "pmullw\t{%2, %0|%0, %2}"
5230 [(set_attr "type" "sseimul")
5231 (set_attr "prefix_data16" "1")
5232 (set_attr "mode" "TI")])
5234 (define_expand "<s>mulv8hi3_highpart"
5235 [(set (match_operand:V8HI 0 "register_operand" "")
5240 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5242 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5245 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5247 (define_insn "*avx_<s>mulv8hi3_highpart"
5248 [(set (match_operand:V8HI 0 "register_operand" "=x")
5253 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5255 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5257 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5258 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5259 [(set_attr "type" "sseimul")
5260 (set_attr "prefix" "vex")
5261 (set_attr "mode" "TI")])
5263 (define_insn "*<s>mulv8hi3_highpart"
5264 [(set (match_operand:V8HI 0 "register_operand" "=x")
5269 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5271 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5273 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5274 "pmulh<u>w\t{%2, %0|%0, %2}"
5275 [(set_attr "type" "sseimul")
5276 (set_attr "prefix_data16" "1")
5277 (set_attr "mode" "TI")])
5279 (define_expand "sse2_umulv2siv2di3"
5280 [(set (match_operand:V2DI 0 "register_operand" "")
5284 (match_operand:V4SI 1 "nonimmediate_operand" "")
5285 (parallel [(const_int 0) (const_int 2)])))
5288 (match_operand:V4SI 2 "nonimmediate_operand" "")
5289 (parallel [(const_int 0) (const_int 2)])))))]
5291 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5293 (define_insn "*avx_umulv2siv2di3"
5294 [(set (match_operand:V2DI 0 "register_operand" "=x")
5298 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5299 (parallel [(const_int 0) (const_int 2)])))
5302 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5303 (parallel [(const_int 0) (const_int 2)])))))]
5304 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5305 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5306 [(set_attr "type" "sseimul")
5307 (set_attr "prefix" "vex")
5308 (set_attr "mode" "TI")])
5310 (define_insn "*sse2_umulv2siv2di3"
5311 [(set (match_operand:V2DI 0 "register_operand" "=x")
5315 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5316 (parallel [(const_int 0) (const_int 2)])))
5319 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5320 (parallel [(const_int 0) (const_int 2)])))))]
5321 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5322 "pmuludq\t{%2, %0|%0, %2}"
5323 [(set_attr "type" "sseimul")
5324 (set_attr "prefix_data16" "1")
5325 (set_attr "mode" "TI")])
5327 (define_expand "sse4_1_mulv2siv2di3"
5328 [(set (match_operand:V2DI 0 "register_operand" "")
5332 (match_operand:V4SI 1 "nonimmediate_operand" "")
5333 (parallel [(const_int 0) (const_int 2)])))
5336 (match_operand:V4SI 2 "nonimmediate_operand" "")
5337 (parallel [(const_int 0) (const_int 2)])))))]
5339 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5341 (define_insn "*avx_mulv2siv2di3"
5342 [(set (match_operand:V2DI 0 "register_operand" "=x")
5346 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5347 (parallel [(const_int 0) (const_int 2)])))
5350 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5351 (parallel [(const_int 0) (const_int 2)])))))]
5352 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5353 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5354 [(set_attr "type" "sseimul")
5355 (set_attr "prefix_extra" "1")
5356 (set_attr "prefix" "vex")
5357 (set_attr "mode" "TI")])
5359 (define_insn "*sse4_1_mulv2siv2di3"
5360 [(set (match_operand:V2DI 0 "register_operand" "=x")
5364 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5365 (parallel [(const_int 0) (const_int 2)])))
5368 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5369 (parallel [(const_int 0) (const_int 2)])))))]
5370 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5371 "pmuldq\t{%2, %0|%0, %2}"
5372 [(set_attr "type" "sseimul")
5373 (set_attr "prefix_extra" "1")
5374 (set_attr "mode" "TI")])
5376 (define_expand "sse2_pmaddwd"
5377 [(set (match_operand:V4SI 0 "register_operand" "")
5382 (match_operand:V8HI 1 "nonimmediate_operand" "")
5383 (parallel [(const_int 0)
5389 (match_operand:V8HI 2 "nonimmediate_operand" "")
5390 (parallel [(const_int 0)
5396 (vec_select:V4HI (match_dup 1)
5397 (parallel [(const_int 1)
5402 (vec_select:V4HI (match_dup 2)
5403 (parallel [(const_int 1)
5406 (const_int 7)]))))))]
5408 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5410 (define_insn "*avx_pmaddwd"
5411 [(set (match_operand:V4SI 0 "register_operand" "=x")
5416 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5417 (parallel [(const_int 0)
5423 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5424 (parallel [(const_int 0)
5430 (vec_select:V4HI (match_dup 1)
5431 (parallel [(const_int 1)
5436 (vec_select:V4HI (match_dup 2)
5437 (parallel [(const_int 1)
5440 (const_int 7)]))))))]
5441 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5442 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix" "vex")
5445 (set_attr "mode" "TI")])
5447 (define_insn "*sse2_pmaddwd"
5448 [(set (match_operand:V4SI 0 "register_operand" "=x")
5453 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5454 (parallel [(const_int 0)
5460 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5461 (parallel [(const_int 0)
5467 (vec_select:V4HI (match_dup 1)
5468 (parallel [(const_int 1)
5473 (vec_select:V4HI (match_dup 2)
5474 (parallel [(const_int 1)
5477 (const_int 7)]))))))]
5478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5479 "pmaddwd\t{%2, %0|%0, %2}"
5480 [(set_attr "type" "sseiadd")
5481 (set_attr "atom_unit" "simul")
5482 (set_attr "prefix_data16" "1")
5483 (set_attr "mode" "TI")])
5485 (define_expand "mulv4si3"
5486 [(set (match_operand:V4SI 0 "register_operand" "")
5487 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5488 (match_operand:V4SI 2 "register_operand" "")))]
5491 if (TARGET_SSE4_1 || TARGET_AVX)
5492 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5495 (define_insn "*avx_mulv4si3"
5496 [(set (match_operand:V4SI 0 "register_operand" "=x")
5497 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5498 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5499 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5500 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5501 [(set_attr "type" "sseimul")
5502 (set_attr "prefix_extra" "1")
5503 (set_attr "prefix" "vex")
5504 (set_attr "mode" "TI")])
5506 (define_insn "*sse4_1_mulv4si3"
5507 [(set (match_operand:V4SI 0 "register_operand" "=x")
5508 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5510 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5511 "pmulld\t{%2, %0|%0, %2}"
5512 [(set_attr "type" "sseimul")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "mode" "TI")])
5516 (define_insn_and_split "*sse2_mulv4si3"
5517 [(set (match_operand:V4SI 0 "register_operand" "")
5518 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5519 (match_operand:V4SI 2 "register_operand" "")))]
5520 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5521 && can_create_pseudo_p ()"
5526 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5532 t1 = gen_reg_rtx (V4SImode);
5533 t2 = gen_reg_rtx (V4SImode);
5534 t3 = gen_reg_rtx (V4SImode);
5535 t4 = gen_reg_rtx (V4SImode);
5536 t5 = gen_reg_rtx (V4SImode);
5537 t6 = gen_reg_rtx (V4SImode);
5538 thirtytwo = GEN_INT (32);
5540 /* Multiply elements 2 and 0. */
5541 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5544 /* Shift both input vectors down one element, so that elements 3
5545 and 1 are now in the slots for elements 2 and 0. For K8, at
5546 least, this is faster than using a shuffle. */
5547 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5548 gen_lowpart (V1TImode, op1),
5550 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5551 gen_lowpart (V1TImode, op2),
5553 /* Multiply elements 3 and 1. */
5554 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5557 /* Move the results in element 2 down to element 1; we don't care
5558 what goes in elements 2 and 3. */
5559 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5560 const0_rtx, const0_rtx));
5561 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5562 const0_rtx, const0_rtx));
5564 /* Merge the parts back together. */
5565 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5569 (define_insn_and_split "mulv2di3"
5570 [(set (match_operand:V2DI 0 "register_operand" "")
5571 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5572 (match_operand:V2DI 2 "register_operand" "")))]
5574 && can_create_pseudo_p ()"
5579 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5588 /* op1: A,B,C,D, op2: E,F,G,H */
5589 op1 = gen_lowpart (V4SImode, op1);
5590 op2 = gen_lowpart (V4SImode, op2);
5592 t1 = gen_reg_rtx (V4SImode);
5593 t2 = gen_reg_rtx (V4SImode);
5594 t3 = gen_reg_rtx (V2DImode);
5595 t4 = gen_reg_rtx (V2DImode);
5598 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5604 /* t2: (B*E),(A*F),(D*G),(C*H) */
5605 emit_insn (gen_mulv4si3 (t2, t1, op2));
5607 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5608 emit_insn (gen_xop_phadddq (t3, t2));
5610 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5611 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5613 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5614 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5618 t1 = gen_reg_rtx (V2DImode);
5619 t2 = gen_reg_rtx (V2DImode);
5620 t3 = gen_reg_rtx (V2DImode);
5621 t4 = gen_reg_rtx (V2DImode);
5622 t5 = gen_reg_rtx (V2DImode);
5623 t6 = gen_reg_rtx (V2DImode);
5624 thirtytwo = GEN_INT (32);
5626 /* Multiply low parts. */
5627 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5628 gen_lowpart (V4SImode, op2)));
5630 /* Shift input vectors left 32 bits so we can multiply high parts. */
5631 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5632 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5634 /* Multiply high parts by low parts. */
5635 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5636 gen_lowpart (V4SImode, t3)));
5637 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5638 gen_lowpart (V4SImode, t2)));
5640 /* Shift them back. */
5641 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5642 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5644 /* Add the three parts together. */
5645 emit_insn (gen_addv2di3 (t6, t1, t4));
5646 emit_insn (gen_addv2di3 (op0, t6, t5));
5651 (define_expand "vec_widen_smult_hi_v8hi"
5652 [(match_operand:V4SI 0 "register_operand" "")
5653 (match_operand:V8HI 1 "register_operand" "")
5654 (match_operand:V8HI 2 "register_operand" "")]
5657 rtx op1, op2, t1, t2, dest;
5661 t1 = gen_reg_rtx (V8HImode);
5662 t2 = gen_reg_rtx (V8HImode);
5663 dest = gen_lowpart (V8HImode, operands[0]);
5665 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5666 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5667 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5671 (define_expand "vec_widen_smult_lo_v8hi"
5672 [(match_operand:V4SI 0 "register_operand" "")
5673 (match_operand:V8HI 1 "register_operand" "")
5674 (match_operand:V8HI 2 "register_operand" "")]
5677 rtx op1, op2, t1, t2, dest;
5681 t1 = gen_reg_rtx (V8HImode);
5682 t2 = gen_reg_rtx (V8HImode);
5683 dest = gen_lowpart (V8HImode, operands[0]);
5685 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5686 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5687 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5691 (define_expand "vec_widen_umult_hi_v8hi"
5692 [(match_operand:V4SI 0 "register_operand" "")
5693 (match_operand:V8HI 1 "register_operand" "")
5694 (match_operand:V8HI 2 "register_operand" "")]
5697 rtx op1, op2, t1, t2, dest;
5701 t1 = gen_reg_rtx (V8HImode);
5702 t2 = gen_reg_rtx (V8HImode);
5703 dest = gen_lowpart (V8HImode, operands[0]);
5705 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5706 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5707 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5711 (define_expand "vec_widen_umult_lo_v8hi"
5712 [(match_operand:V4SI 0 "register_operand" "")
5713 (match_operand:V8HI 1 "register_operand" "")
5714 (match_operand:V8HI 2 "register_operand" "")]
5717 rtx op1, op2, t1, t2, dest;
5721 t1 = gen_reg_rtx (V8HImode);
5722 t2 = gen_reg_rtx (V8HImode);
5723 dest = gen_lowpart (V8HImode, operands[0]);
5725 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5726 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5727 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5731 (define_expand "vec_widen_smult_hi_v4si"
5732 [(match_operand:V2DI 0 "register_operand" "")
5733 (match_operand:V4SI 1 "register_operand" "")
5734 (match_operand:V4SI 2 "register_operand" "")]
5739 t1 = gen_reg_rtx (V4SImode);
5740 t2 = gen_reg_rtx (V4SImode);
5742 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5747 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5752 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5756 (define_expand "vec_widen_smult_lo_v4si"
5757 [(match_operand:V2DI 0 "register_operand" "")
5758 (match_operand:V4SI 1 "register_operand" "")
5759 (match_operand:V4SI 2 "register_operand" "")]
5764 t1 = gen_reg_rtx (V4SImode);
5765 t2 = gen_reg_rtx (V4SImode);
5767 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5772 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5777 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5781 (define_expand "vec_widen_umult_hi_v4si"
5782 [(match_operand:V2DI 0 "register_operand" "")
5783 (match_operand:V4SI 1 "register_operand" "")
5784 (match_operand:V4SI 2 "register_operand" "")]
5787 rtx op1, op2, t1, t2;
5791 t1 = gen_reg_rtx (V4SImode);
5792 t2 = gen_reg_rtx (V4SImode);
5794 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5795 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5796 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5800 (define_expand "vec_widen_umult_lo_v4si"
5801 [(match_operand:V2DI 0 "register_operand" "")
5802 (match_operand:V4SI 1 "register_operand" "")
5803 (match_operand:V4SI 2 "register_operand" "")]
5806 rtx op1, op2, t1, t2;
5810 t1 = gen_reg_rtx (V4SImode);
5811 t2 = gen_reg_rtx (V4SImode);
5813 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5814 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5815 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5819 (define_expand "sdot_prodv8hi"
5820 [(match_operand:V4SI 0 "register_operand" "")
5821 (match_operand:V8HI 1 "register_operand" "")
5822 (match_operand:V8HI 2 "register_operand" "")
5823 (match_operand:V4SI 3 "register_operand" "")]
5826 rtx t = gen_reg_rtx (V4SImode);
5827 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5828 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5832 (define_expand "udot_prodv4si"
5833 [(match_operand:V2DI 0 "register_operand" "")
5834 (match_operand:V4SI 1 "register_operand" "")
5835 (match_operand:V4SI 2 "register_operand" "")
5836 (match_operand:V2DI 3 "register_operand" "")]
5841 t1 = gen_reg_rtx (V2DImode);
5842 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5843 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5845 t2 = gen_reg_rtx (V4SImode);
5846 t3 = gen_reg_rtx (V4SImode);
5847 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5848 gen_lowpart (V1TImode, operands[1]),
5850 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5851 gen_lowpart (V1TImode, operands[2]),
5854 t4 = gen_reg_rtx (V2DImode);
5855 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5857 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5861 (define_insn "*avx_ashr<mode>3"
5862 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5864 (match_operand:SSEMODE24 1 "register_operand" "x")
5865 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5867 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5868 [(set_attr "type" "sseishft")
5869 (set_attr "prefix" "vex")
5870 (set (attr "length_immediate")
5871 (if_then_else (match_operand 2 "const_int_operand" "")
5873 (const_string "0")))
5874 (set_attr "mode" "TI")])
5876 (define_insn "ashr<mode>3"
5877 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5879 (match_operand:SSEMODE24 1 "register_operand" "0")
5880 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5882 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5883 [(set_attr "type" "sseishft")
5884 (set_attr "prefix_data16" "1")
5885 (set (attr "length_immediate")
5886 (if_then_else (match_operand 2 "const_int_operand" "")
5888 (const_string "0")))
5889 (set_attr "mode" "TI")])
5891 (define_insn "*avx_lshrv1ti3"
5892 [(set (match_operand:V1TI 0 "register_operand" "=x")
5894 (match_operand:V1TI 1 "register_operand" "x")
5895 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5898 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5899 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5901 [(set_attr "type" "sseishft")
5902 (set_attr "prefix" "vex")
5903 (set_attr "length_immediate" "1")
5904 (set_attr "mode" "TI")])
5906 (define_insn "*avx_lshr<mode>3"
5907 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5908 (lshiftrt:SSEMODE248
5909 (match_operand:SSEMODE248 1 "register_operand" "x")
5910 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5912 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5913 [(set_attr "type" "sseishft")
5914 (set_attr "prefix" "vex")
5915 (set (attr "length_immediate")
5916 (if_then_else (match_operand 2 "const_int_operand" "")
5918 (const_string "0")))
5919 (set_attr "mode" "TI")])
5921 (define_insn "sse2_lshrv1ti3"
5922 [(set (match_operand:V1TI 0 "register_operand" "=x")
5924 (match_operand:V1TI 1 "register_operand" "0")
5925 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5928 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5929 return "psrldq\t{%2, %0|%0, %2}";
5931 [(set_attr "type" "sseishft")
5932 (set_attr "prefix_data16" "1")
5933 (set_attr "length_immediate" "1")
5934 (set_attr "atom_unit" "sishuf")
5935 (set_attr "mode" "TI")])
5937 (define_insn "lshr<mode>3"
5938 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5939 (lshiftrt:SSEMODE248
5940 (match_operand:SSEMODE248 1 "register_operand" "0")
5941 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5943 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5944 [(set_attr "type" "sseishft")
5945 (set_attr "prefix_data16" "1")
5946 (set (attr "length_immediate")
5947 (if_then_else (match_operand 2 "const_int_operand" "")
5949 (const_string "0")))
5950 (set_attr "mode" "TI")])
5952 (define_insn "*avx_ashlv1ti3"
5953 [(set (match_operand:V1TI 0 "register_operand" "=x")
5954 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5955 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5958 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5959 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5961 [(set_attr "type" "sseishft")
5962 (set_attr "prefix" "vex")
5963 (set_attr "length_immediate" "1")
5964 (set_attr "mode" "TI")])
5966 (define_insn "*avx_ashl<mode>3"
5967 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5969 (match_operand:SSEMODE248 1 "register_operand" "x")
5970 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5972 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "type" "sseishft")
5974 (set_attr "prefix" "vex")
5975 (set (attr "length_immediate")
5976 (if_then_else (match_operand 2 "const_int_operand" "")
5978 (const_string "0")))
5979 (set_attr "mode" "TI")])
5981 (define_insn "sse2_ashlv1ti3"
5982 [(set (match_operand:V1TI 0 "register_operand" "=x")
5983 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5984 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5987 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5988 return "pslldq\t{%2, %0|%0, %2}";
5990 [(set_attr "type" "sseishft")
5991 (set_attr "prefix_data16" "1")
5992 (set_attr "length_immediate" "1")
5993 (set_attr "mode" "TI")])
5995 (define_insn "ashl<mode>3"
5996 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5998 (match_operand:SSEMODE248 1 "register_operand" "0")
5999 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6001 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6002 [(set_attr "type" "sseishft")
6003 (set_attr "prefix_data16" "1")
6004 (set (attr "length_immediate")
6005 (if_then_else (match_operand 2 "const_int_operand" "")
6007 (const_string "0")))
6008 (set_attr "mode" "TI")])
6010 (define_expand "vec_shl_<mode>"
6011 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6013 (match_operand:SSEMODEI 1 "register_operand" "")
6014 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6017 operands[0] = gen_lowpart (V1TImode, operands[0]);
6018 operands[1] = gen_lowpart (V1TImode, operands[1]);
6021 (define_expand "vec_shr_<mode>"
6022 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6024 (match_operand:SSEMODEI 1 "register_operand" "")
6025 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6028 operands[0] = gen_lowpart (V1TImode, operands[0]);
6029 operands[1] = gen_lowpart (V1TImode, operands[1]);
6032 (define_insn "*avx_<code><mode>3"
6033 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6035 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6036 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6037 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6038 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6039 [(set_attr "type" "sseiadd")
6040 (set (attr "prefix_extra")
6042 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6045 (const_string "0")))
6046 (set_attr "prefix" "vex")
6047 (set_attr "mode" "TI")])
6049 (define_expand "<code>v16qi3"
6050 [(set (match_operand:V16QI 0 "register_operand" "")
6052 (match_operand:V16QI 1 "nonimmediate_operand" "")
6053 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6055 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6057 (define_insn "*<code>v16qi3"
6058 [(set (match_operand:V16QI 0 "register_operand" "=x")
6060 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6061 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6062 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6063 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6064 [(set_attr "type" "sseiadd")
6065 (set_attr "prefix_data16" "1")
6066 (set_attr "mode" "TI")])
6068 (define_expand "<code>v8hi3"
6069 [(set (match_operand:V8HI 0 "register_operand" "")
6071 (match_operand:V8HI 1 "nonimmediate_operand" "")
6072 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6074 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6076 (define_insn "*<code>v8hi3"
6077 [(set (match_operand:V8HI 0 "register_operand" "=x")
6079 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6080 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6081 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6082 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6083 [(set_attr "type" "sseiadd")
6084 (set_attr "prefix_data16" "1")
6085 (set_attr "mode" "TI")])
6087 (define_expand "umaxv8hi3"
6088 [(set (match_operand:V8HI 0 "register_operand" "")
6089 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6090 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6094 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6097 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6098 if (rtx_equal_p (op3, op2))
6099 op3 = gen_reg_rtx (V8HImode);
6100 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6101 emit_insn (gen_addv8hi3 (op0, op3, op2));
6106 (define_expand "smax<mode>3"
6107 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6108 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6109 (match_operand:SSEMODE14 2 "register_operand" "")))]
6113 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6119 xops[0] = operands[0];
6120 xops[1] = operands[1];
6121 xops[2] = operands[2];
6122 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6123 xops[4] = operands[1];
6124 xops[5] = operands[2];
6125 ok = ix86_expand_int_vcond (xops);
6131 (define_insn "*sse4_1_<code><mode>3"
6132 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6134 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6135 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6136 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6137 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6138 [(set_attr "type" "sseiadd")
6139 (set_attr "prefix_extra" "1")
6140 (set_attr "mode" "TI")])
6142 (define_expand "smaxv2di3"
6143 [(set (match_operand:V2DI 0 "register_operand" "")
6144 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6145 (match_operand:V2DI 2 "register_operand" "")))]
6151 xops[0] = operands[0];
6152 xops[1] = operands[1];
6153 xops[2] = operands[2];
6154 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6155 xops[4] = operands[1];
6156 xops[5] = operands[2];
6157 ok = ix86_expand_int_vcond (xops);
6162 (define_expand "umaxv4si3"
6163 [(set (match_operand:V4SI 0 "register_operand" "")
6164 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6165 (match_operand:V4SI 2 "register_operand" "")))]
6169 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6175 xops[0] = operands[0];
6176 xops[1] = operands[1];
6177 xops[2] = operands[2];
6178 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6179 xops[4] = operands[1];
6180 xops[5] = operands[2];
6181 ok = ix86_expand_int_vcond (xops);
6187 (define_insn "*sse4_1_<code><mode>3"
6188 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6190 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6191 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6192 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6193 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6194 [(set_attr "type" "sseiadd")
6195 (set_attr "prefix_extra" "1")
6196 (set_attr "mode" "TI")])
6198 (define_expand "umaxv2di3"
6199 [(set (match_operand:V2DI 0 "register_operand" "")
6200 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6201 (match_operand:V2DI 2 "register_operand" "")))]
6207 xops[0] = operands[0];
6208 xops[1] = operands[1];
6209 xops[2] = operands[2];
6210 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6211 xops[4] = operands[1];
6212 xops[5] = operands[2];
6213 ok = ix86_expand_int_vcond (xops);
6218 (define_expand "smin<mode>3"
6219 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6220 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6221 (match_operand:SSEMODE14 2 "register_operand" "")))]
6225 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6231 xops[0] = operands[0];
6232 xops[1] = operands[2];
6233 xops[2] = operands[1];
6234 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6235 xops[4] = operands[1];
6236 xops[5] = operands[2];
6237 ok = ix86_expand_int_vcond (xops);
6243 (define_expand "sminv2di3"
6244 [(set (match_operand:V2DI 0 "register_operand" "")
6245 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6246 (match_operand:V2DI 2 "register_operand" "")))]
6252 xops[0] = operands[0];
6253 xops[1] = operands[2];
6254 xops[2] = operands[1];
6255 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6256 xops[4] = operands[1];
6257 xops[5] = operands[2];
6258 ok = ix86_expand_int_vcond (xops);
6263 (define_expand "umin<mode>3"
6264 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6265 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6266 (match_operand:SSEMODE24 2 "register_operand" "")))]
6270 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6276 xops[0] = operands[0];
6277 xops[1] = operands[2];
6278 xops[2] = operands[1];
6279 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6280 xops[4] = operands[1];
6281 xops[5] = operands[2];
6282 ok = ix86_expand_int_vcond (xops);
6288 (define_expand "uminv2di3"
6289 [(set (match_operand:V2DI 0 "register_operand" "")
6290 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6291 (match_operand:V2DI 2 "register_operand" "")))]
6297 xops[0] = operands[0];
6298 xops[1] = operands[2];
6299 xops[2] = operands[1];
6300 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6301 xops[4] = operands[1];
6302 xops[5] = operands[2];
6303 ok = ix86_expand_int_vcond (xops);
6308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6310 ;; Parallel integral comparisons
6312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6314 (define_expand "sse2_eq<mode>3"
6315 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6317 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6318 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6319 "TARGET_SSE2 && !TARGET_XOP "
6320 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6322 (define_insn "*avx_eq<mode>3"
6323 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6325 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6326 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6327 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6328 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6329 [(set_attr "type" "ssecmp")
6330 (set (attr "prefix_extra")
6331 (if_then_else (match_operand:V2DI 0 "" "")
6333 (const_string "*")))
6334 (set_attr "prefix" "vex")
6335 (set_attr "mode" "TI")])
6337 (define_insn "*sse2_eq<mode>3"
6338 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6340 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6341 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6342 "TARGET_SSE2 && !TARGET_XOP
6343 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6344 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6345 [(set_attr "type" "ssecmp")
6346 (set_attr "prefix_data16" "1")
6347 (set_attr "mode" "TI")])
6349 (define_expand "sse4_1_eqv2di3"
6350 [(set (match_operand:V2DI 0 "register_operand" "")
6352 (match_operand:V2DI 1 "nonimmediate_operand" "")
6353 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6355 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6357 (define_insn "*sse4_1_eqv2di3"
6358 [(set (match_operand:V2DI 0 "register_operand" "=x")
6360 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6361 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6362 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6363 "pcmpeqq\t{%2, %0|%0, %2}"
6364 [(set_attr "type" "ssecmp")
6365 (set_attr "prefix_extra" "1")
6366 (set_attr "mode" "TI")])
6368 (define_insn "*avx_gt<mode>3"
6369 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6371 (match_operand:SSEMODE1248 1 "register_operand" "x")
6372 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6374 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6375 [(set_attr "type" "ssecmp")
6376 (set (attr "prefix_extra")
6377 (if_then_else (match_operand:V2DI 0 "" "")
6379 (const_string "*")))
6380 (set_attr "prefix" "vex")
6381 (set_attr "mode" "TI")])
6383 (define_insn "sse2_gt<mode>3"
6384 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6386 (match_operand:SSEMODE124 1 "register_operand" "0")
6387 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6388 "TARGET_SSE2 && !TARGET_XOP"
6389 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6390 [(set_attr "type" "ssecmp")
6391 (set_attr "prefix_data16" "1")
6392 (set_attr "mode" "TI")])
6394 (define_insn "sse4_2_gtv2di3"
6395 [(set (match_operand:V2DI 0 "register_operand" "=x")
6397 (match_operand:V2DI 1 "register_operand" "0")
6398 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6400 "pcmpgtq\t{%2, %0|%0, %2}"
6401 [(set_attr "type" "ssecmp")
6402 (set_attr "prefix_extra" "1")
6403 (set_attr "mode" "TI")])
6405 (define_expand "vcond<mode>"
6406 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6407 (if_then_else:SSEMODE124C8
6408 (match_operator 3 ""
6409 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6410 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6411 (match_operand:SSEMODE124C8 1 "general_operand" "")
6412 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6415 bool ok = ix86_expand_int_vcond (operands);
6420 (define_expand "vcondu<mode>"
6421 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6422 (if_then_else:SSEMODE124C8
6423 (match_operator 3 ""
6424 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6425 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6426 (match_operand:SSEMODE124C8 1 "general_operand" "")
6427 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6430 bool ok = ix86_expand_int_vcond (operands);
6435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6437 ;; Parallel bitwise logical operations
6439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6441 (define_expand "one_cmpl<mode>2"
6442 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6443 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6447 int i, n = GET_MODE_NUNITS (<MODE>mode);
6448 rtvec v = rtvec_alloc (n);
6450 for (i = 0; i < n; ++i)
6451 RTVEC_ELT (v, i) = constm1_rtx;
6453 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6456 (define_insn "*avx_andnot<mode>3"
6457 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6459 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6460 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6462 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6463 [(set_attr "type" "sselog")
6464 (set_attr "prefix" "vex")
6465 (set_attr "mode" "<avxvecpsmode>")])
6467 (define_insn "*sse_andnot<mode>3"
6468 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6470 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6471 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6472 "(TARGET_SSE && !TARGET_SSE2)"
6473 "andnps\t{%2, %0|%0, %2}"
6474 [(set_attr "type" "sselog")
6475 (set_attr "mode" "V4SF")])
6477 (define_insn "*avx_andnot<mode>3"
6478 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6480 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6481 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6483 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6484 [(set_attr "type" "sselog")
6485 (set_attr "prefix" "vex")
6486 (set_attr "mode" "TI")])
6488 (define_insn "sse2_andnot<mode>3"
6489 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6491 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6492 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6494 "pandn\t{%2, %0|%0, %2}"
6495 [(set_attr "type" "sselog")
6496 (set_attr "prefix_data16" "1")
6497 (set_attr "mode" "TI")])
6499 (define_insn "*andnottf3"
6500 [(set (match_operand:TF 0 "register_operand" "=x")
6502 (not:TF (match_operand:TF 1 "register_operand" "0"))
6503 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6505 "pandn\t{%2, %0|%0, %2}"
6506 [(set_attr "type" "sselog")
6507 (set_attr "prefix_data16" "1")
6508 (set_attr "mode" "TI")])
6510 (define_expand "<code><mode>3"
6511 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6513 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6514 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6516 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6518 (define_insn "*avx_<code><mode>3"
6519 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6520 (any_logic:AVX256MODEI
6521 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6522 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6524 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6525 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6526 [(set_attr "type" "sselog")
6527 (set_attr "prefix" "vex")
6528 (set_attr "mode" "<avxvecpsmode>")])
6530 (define_insn "*sse_<code><mode>3"
6531 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6533 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6534 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6535 "(TARGET_SSE && !TARGET_SSE2)
6536 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6537 "<logic>ps\t{%2, %0|%0, %2}"
6538 [(set_attr "type" "sselog")
6539 (set_attr "mode" "V4SF")])
6541 (define_insn "*avx_<code><mode>3"
6542 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6544 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6545 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6547 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6548 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6549 [(set_attr "type" "sselog")
6550 (set_attr "prefix" "vex")
6551 (set_attr "mode" "TI")])
6553 (define_insn "*sse2_<code><mode>3"
6554 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6556 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6557 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6558 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6559 "p<logic>\t{%2, %0|%0, %2}"
6560 [(set_attr "type" "sselog")
6561 (set_attr "prefix_data16" "1")
6562 (set_attr "mode" "TI")])
6564 (define_expand "<code>tf3"
6565 [(set (match_operand:TF 0 "register_operand" "")
6567 (match_operand:TF 1 "nonimmediate_operand" "")
6568 (match_operand:TF 2 "nonimmediate_operand" "")))]
6570 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6572 (define_insn "*<code>tf3"
6573 [(set (match_operand:TF 0 "register_operand" "=x")
6575 (match_operand:TF 1 "nonimmediate_operand" "%0")
6576 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6577 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6578 "p<logic>\t{%2, %0|%0, %2}"
6579 [(set_attr "type" "sselog")
6580 (set_attr "prefix_data16" "1")
6581 (set_attr "mode" "TI")])
6583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6585 ;; Parallel integral element swizzling
6587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6589 (define_expand "vec_pack_trunc_v8hi"
6590 [(match_operand:V16QI 0 "register_operand" "")
6591 (match_operand:V8HI 1 "register_operand" "")
6592 (match_operand:V8HI 2 "register_operand" "")]
6595 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6596 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6597 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6601 (define_expand "vec_pack_trunc_v4si"
6602 [(match_operand:V8HI 0 "register_operand" "")
6603 (match_operand:V4SI 1 "register_operand" "")
6604 (match_operand:V4SI 2 "register_operand" "")]
6607 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6608 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6609 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6613 (define_expand "vec_pack_trunc_v2di"
6614 [(match_operand:V4SI 0 "register_operand" "")
6615 (match_operand:V2DI 1 "register_operand" "")
6616 (match_operand:V2DI 2 "register_operand" "")]
6619 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6620 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6621 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6625 (define_insn "*avx_packsswb"
6626 [(set (match_operand:V16QI 0 "register_operand" "=x")
6629 (match_operand:V8HI 1 "register_operand" "x"))
6631 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6633 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6634 [(set_attr "type" "sselog")
6635 (set_attr "prefix" "vex")
6636 (set_attr "mode" "TI")])
6638 (define_insn "sse2_packsswb"
6639 [(set (match_operand:V16QI 0 "register_operand" "=x")
6642 (match_operand:V8HI 1 "register_operand" "0"))
6644 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6646 "packsswb\t{%2, %0|%0, %2}"
6647 [(set_attr "type" "sselog")
6648 (set_attr "prefix_data16" "1")
6649 (set_attr "mode" "TI")])
6651 (define_insn "*avx_packssdw"
6652 [(set (match_operand:V8HI 0 "register_operand" "=x")
6655 (match_operand:V4SI 1 "register_operand" "x"))
6657 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6659 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6660 [(set_attr "type" "sselog")
6661 (set_attr "prefix" "vex")
6662 (set_attr "mode" "TI")])
6664 (define_insn "sse2_packssdw"
6665 [(set (match_operand:V8HI 0 "register_operand" "=x")
6668 (match_operand:V4SI 1 "register_operand" "0"))
6670 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6672 "packssdw\t{%2, %0|%0, %2}"
6673 [(set_attr "type" "sselog")
6674 (set_attr "prefix_data16" "1")
6675 (set_attr "mode" "TI")])
6677 (define_insn "*avx_packuswb"
6678 [(set (match_operand:V16QI 0 "register_operand" "=x")
6681 (match_operand:V8HI 1 "register_operand" "x"))
6683 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6685 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix" "vex")
6688 (set_attr "mode" "TI")])
6690 (define_insn "sse2_packuswb"
6691 [(set (match_operand:V16QI 0 "register_operand" "=x")
6694 (match_operand:V8HI 1 "register_operand" "0"))
6696 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6698 "packuswb\t{%2, %0|%0, %2}"
6699 [(set_attr "type" "sselog")
6700 (set_attr "prefix_data16" "1")
6701 (set_attr "mode" "TI")])
6703 (define_insn "*avx_interleave_highv16qi"
6704 [(set (match_operand:V16QI 0 "register_operand" "=x")
6707 (match_operand:V16QI 1 "register_operand" "x")
6708 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6709 (parallel [(const_int 8) (const_int 24)
6710 (const_int 9) (const_int 25)
6711 (const_int 10) (const_int 26)
6712 (const_int 11) (const_int 27)
6713 (const_int 12) (const_int 28)
6714 (const_int 13) (const_int 29)
6715 (const_int 14) (const_int 30)
6716 (const_int 15) (const_int 31)])))]
6718 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6719 [(set_attr "type" "sselog")
6720 (set_attr "prefix" "vex")
6721 (set_attr "mode" "TI")])
6723 (define_insn "vec_interleave_highv16qi"
6724 [(set (match_operand:V16QI 0 "register_operand" "=x")
6727 (match_operand:V16QI 1 "register_operand" "0")
6728 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6729 (parallel [(const_int 8) (const_int 24)
6730 (const_int 9) (const_int 25)
6731 (const_int 10) (const_int 26)
6732 (const_int 11) (const_int 27)
6733 (const_int 12) (const_int 28)
6734 (const_int 13) (const_int 29)
6735 (const_int 14) (const_int 30)
6736 (const_int 15) (const_int 31)])))]
6738 "punpckhbw\t{%2, %0|%0, %2}"
6739 [(set_attr "type" "sselog")
6740 (set_attr "prefix_data16" "1")
6741 (set_attr "mode" "TI")])
6743 (define_insn "*avx_interleave_lowv16qi"
6744 [(set (match_operand:V16QI 0 "register_operand" "=x")
6747 (match_operand:V16QI 1 "register_operand" "x")
6748 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6749 (parallel [(const_int 0) (const_int 16)
6750 (const_int 1) (const_int 17)
6751 (const_int 2) (const_int 18)
6752 (const_int 3) (const_int 19)
6753 (const_int 4) (const_int 20)
6754 (const_int 5) (const_int 21)
6755 (const_int 6) (const_int 22)
6756 (const_int 7) (const_int 23)])))]
6758 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6759 [(set_attr "type" "sselog")
6760 (set_attr "prefix" "vex")
6761 (set_attr "mode" "TI")])
6763 (define_insn "vec_interleave_lowv16qi"
6764 [(set (match_operand:V16QI 0 "register_operand" "=x")
6767 (match_operand:V16QI 1 "register_operand" "0")
6768 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6769 (parallel [(const_int 0) (const_int 16)
6770 (const_int 1) (const_int 17)
6771 (const_int 2) (const_int 18)
6772 (const_int 3) (const_int 19)
6773 (const_int 4) (const_int 20)
6774 (const_int 5) (const_int 21)
6775 (const_int 6) (const_int 22)
6776 (const_int 7) (const_int 23)])))]
6778 "punpcklbw\t{%2, %0|%0, %2}"
6779 [(set_attr "type" "sselog")
6780 (set_attr "prefix_data16" "1")
6781 (set_attr "mode" "TI")])
6783 (define_insn "*avx_interleave_highv8hi"
6784 [(set (match_operand:V8HI 0 "register_operand" "=x")
6787 (match_operand:V8HI 1 "register_operand" "x")
6788 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6789 (parallel [(const_int 4) (const_int 12)
6790 (const_int 5) (const_int 13)
6791 (const_int 6) (const_int 14)
6792 (const_int 7) (const_int 15)])))]
6794 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6795 [(set_attr "type" "sselog")
6796 (set_attr "prefix" "vex")
6797 (set_attr "mode" "TI")])
6799 (define_insn "vec_interleave_highv8hi"
6800 [(set (match_operand:V8HI 0 "register_operand" "=x")
6803 (match_operand:V8HI 1 "register_operand" "0")
6804 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6805 (parallel [(const_int 4) (const_int 12)
6806 (const_int 5) (const_int 13)
6807 (const_int 6) (const_int 14)
6808 (const_int 7) (const_int 15)])))]
6810 "punpckhwd\t{%2, %0|%0, %2}"
6811 [(set_attr "type" "sselog")
6812 (set_attr "prefix_data16" "1")
6813 (set_attr "mode" "TI")])
6815 (define_insn "*avx_interleave_lowv8hi"
6816 [(set (match_operand:V8HI 0 "register_operand" "=x")
6819 (match_operand:V8HI 1 "register_operand" "x")
6820 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6821 (parallel [(const_int 0) (const_int 8)
6822 (const_int 1) (const_int 9)
6823 (const_int 2) (const_int 10)
6824 (const_int 3) (const_int 11)])))]
6826 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6827 [(set_attr "type" "sselog")
6828 (set_attr "prefix" "vex")
6829 (set_attr "mode" "TI")])
6831 (define_insn "vec_interleave_lowv8hi"
6832 [(set (match_operand:V8HI 0 "register_operand" "=x")
6835 (match_operand:V8HI 1 "register_operand" "0")
6836 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6837 (parallel [(const_int 0) (const_int 8)
6838 (const_int 1) (const_int 9)
6839 (const_int 2) (const_int 10)
6840 (const_int 3) (const_int 11)])))]
6842 "punpcklwd\t{%2, %0|%0, %2}"
6843 [(set_attr "type" "sselog")
6844 (set_attr "prefix_data16" "1")
6845 (set_attr "mode" "TI")])
6847 (define_insn "*avx_interleave_highv4si"
6848 [(set (match_operand:V4SI 0 "register_operand" "=x")
6851 (match_operand:V4SI 1 "register_operand" "x")
6852 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6853 (parallel [(const_int 2) (const_int 6)
6854 (const_int 3) (const_int 7)])))]
6856 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6857 [(set_attr "type" "sselog")
6858 (set_attr "prefix" "vex")
6859 (set_attr "mode" "TI")])
6861 (define_insn "vec_interleave_highv4si"
6862 [(set (match_operand:V4SI 0 "register_operand" "=x")
6865 (match_operand:V4SI 1 "register_operand" "0")
6866 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6867 (parallel [(const_int 2) (const_int 6)
6868 (const_int 3) (const_int 7)])))]
6870 "punpckhdq\t{%2, %0|%0, %2}"
6871 [(set_attr "type" "sselog")
6872 (set_attr "prefix_data16" "1")
6873 (set_attr "mode" "TI")])
6875 (define_insn "*avx_interleave_lowv4si"
6876 [(set (match_operand:V4SI 0 "register_operand" "=x")
6879 (match_operand:V4SI 1 "register_operand" "x")
6880 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6881 (parallel [(const_int 0) (const_int 4)
6882 (const_int 1) (const_int 5)])))]
6884 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix" "vex")
6887 (set_attr "mode" "TI")])
6889 (define_insn "vec_interleave_lowv4si"
6890 [(set (match_operand:V4SI 0 "register_operand" "=x")
6893 (match_operand:V4SI 1 "register_operand" "0")
6894 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6895 (parallel [(const_int 0) (const_int 4)
6896 (const_int 1) (const_int 5)])))]
6898 "punpckldq\t{%2, %0|%0, %2}"
6899 [(set_attr "type" "sselog")
6900 (set_attr "prefix_data16" "1")
6901 (set_attr "mode" "TI")])
6903 (define_insn "*avx_pinsr<ssevecsize>"
6904 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6905 (vec_merge:SSEMODE124
6906 (vec_duplicate:SSEMODE124
6907 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6908 (match_operand:SSEMODE124 1 "register_operand" "x")
6909 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6912 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6913 if (MEM_P (operands[2]))
6914 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6916 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6918 [(set_attr "type" "sselog")
6919 (set (attr "prefix_extra")
6920 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6922 (const_string "1")))
6923 (set_attr "length_immediate" "1")
6924 (set_attr "prefix" "vex")
6925 (set_attr "mode" "TI")])
6927 (define_insn "*sse4_1_pinsrb"
6928 [(set (match_operand:V16QI 0 "register_operand" "=x")
6930 (vec_duplicate:V16QI
6931 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6932 (match_operand:V16QI 1 "register_operand" "0")
6933 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6936 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6937 if (MEM_P (operands[2]))
6938 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6940 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6942 [(set_attr "type" "sselog")
6943 (set_attr "prefix_extra" "1")
6944 (set_attr "length_immediate" "1")
6945 (set_attr "mode" "TI")])
6947 (define_insn "*sse2_pinsrw"
6948 [(set (match_operand:V8HI 0 "register_operand" "=x")
6951 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6952 (match_operand:V8HI 1 "register_operand" "0")
6953 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6956 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6957 if (MEM_P (operands[2]))
6958 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6960 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6962 [(set_attr "type" "sselog")
6963 (set_attr "prefix_data16" "1")
6964 (set_attr "length_immediate" "1")
6965 (set_attr "mode" "TI")])
6967 ;; It must come before sse2_loadld since it is preferred.
6968 (define_insn "*sse4_1_pinsrd"
6969 [(set (match_operand:V4SI 0 "register_operand" "=x")
6972 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6973 (match_operand:V4SI 1 "register_operand" "0")
6974 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6977 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6978 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6980 [(set_attr "type" "sselog")
6981 (set_attr "prefix_extra" "1")
6982 (set_attr "length_immediate" "1")
6983 (set_attr "mode" "TI")])
6985 (define_insn "*avx_pinsrq"
6986 [(set (match_operand:V2DI 0 "register_operand" "=x")
6989 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6990 (match_operand:V2DI 1 "register_operand" "x")
6991 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6992 "TARGET_AVX && TARGET_64BIT"
6994 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6995 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6997 [(set_attr "type" "sselog")
6998 (set_attr "prefix_extra" "1")
6999 (set_attr "length_immediate" "1")
7000 (set_attr "prefix" "vex")
7001 (set_attr "mode" "TI")])
7003 (define_insn "*sse4_1_pinsrq"
7004 [(set (match_operand:V2DI 0 "register_operand" "=x")
7007 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7008 (match_operand:V2DI 1 "register_operand" "0")
7009 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7010 "TARGET_SSE4_1 && TARGET_64BIT"
7012 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7013 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7015 [(set_attr "type" "sselog")
7016 (set_attr "prefix_rex" "1")
7017 (set_attr "prefix_extra" "1")
7018 (set_attr "length_immediate" "1")
7019 (set_attr "mode" "TI")])
7021 (define_insn "*sse4_1_pextrb_<mode>"
7022 [(set (match_operand:SWI48 0 "register_operand" "=r")
7025 (match_operand:V16QI 1 "register_operand" "x")
7026 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7028 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7029 [(set_attr "type" "sselog")
7030 (set_attr "prefix_extra" "1")
7031 (set_attr "length_immediate" "1")
7032 (set_attr "prefix" "maybe_vex")
7033 (set_attr "mode" "TI")])
7035 (define_insn "*sse4_1_pextrb_memory"
7036 [(set (match_operand:QI 0 "memory_operand" "=m")
7038 (match_operand:V16QI 1 "register_operand" "x")
7039 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7041 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7042 [(set_attr "type" "sselog")
7043 (set_attr "prefix_extra" "1")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "maybe_vex")
7046 (set_attr "mode" "TI")])
7048 (define_insn "*sse2_pextrw_<mode>"
7049 [(set (match_operand:SWI48 0 "register_operand" "=r")
7052 (match_operand:V8HI 1 "register_operand" "x")
7053 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7055 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7056 [(set_attr "type" "sselog")
7057 (set_attr "prefix_data16" "1")
7058 (set_attr "length_immediate" "1")
7059 (set_attr "prefix" "maybe_vex")
7060 (set_attr "mode" "TI")])
7062 (define_insn "*sse4_1_pextrw_memory"
7063 [(set (match_operand:HI 0 "memory_operand" "=m")
7065 (match_operand:V8HI 1 "register_operand" "x")
7066 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7068 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7069 [(set_attr "type" "sselog")
7070 (set_attr "prefix_extra" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "maybe_vex")
7073 (set_attr "mode" "TI")])
7075 (define_insn "*sse4_1_pextrd"
7076 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7078 (match_operand:V4SI 1 "register_operand" "x")
7079 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7081 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7082 [(set_attr "type" "sselog")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "maybe_vex")
7086 (set_attr "mode" "TI")])
7088 (define_insn "*sse4_1_pextrd_zext"
7089 [(set (match_operand:DI 0 "register_operand" "=r")
7092 (match_operand:V4SI 1 "register_operand" "x")
7093 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7094 "TARGET_64BIT && TARGET_SSE4_1"
7095 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7096 [(set_attr "type" "sselog")
7097 (set_attr "prefix_extra" "1")
7098 (set_attr "length_immediate" "1")
7099 (set_attr "prefix" "maybe_vex")
7100 (set_attr "mode" "TI")])
7102 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7103 (define_insn "*sse4_1_pextrq"
7104 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7106 (match_operand:V2DI 1 "register_operand" "x")
7107 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7108 "TARGET_SSE4_1 && TARGET_64BIT"
7109 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7110 [(set_attr "type" "sselog")
7111 (set_attr "prefix_rex" "1")
7112 (set_attr "prefix_extra" "1")
7113 (set_attr "length_immediate" "1")
7114 (set_attr "prefix" "maybe_vex")
7115 (set_attr "mode" "TI")])
7117 (define_expand "sse2_pshufd"
7118 [(match_operand:V4SI 0 "register_operand" "")
7119 (match_operand:V4SI 1 "nonimmediate_operand" "")
7120 (match_operand:SI 2 "const_int_operand" "")]
7123 int mask = INTVAL (operands[2]);
7124 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7125 GEN_INT ((mask >> 0) & 3),
7126 GEN_INT ((mask >> 2) & 3),
7127 GEN_INT ((mask >> 4) & 3),
7128 GEN_INT ((mask >> 6) & 3)));
7132 (define_insn "sse2_pshufd_1"
7133 [(set (match_operand:V4SI 0 "register_operand" "=x")
7135 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7136 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7137 (match_operand 3 "const_0_to_3_operand" "")
7138 (match_operand 4 "const_0_to_3_operand" "")
7139 (match_operand 5 "const_0_to_3_operand" "")])))]
7143 mask |= INTVAL (operands[2]) << 0;
7144 mask |= INTVAL (operands[3]) << 2;
7145 mask |= INTVAL (operands[4]) << 4;
7146 mask |= INTVAL (operands[5]) << 6;
7147 operands[2] = GEN_INT (mask);
7149 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7151 [(set_attr "type" "sselog1")
7152 (set_attr "prefix_data16" "1")
7153 (set_attr "prefix" "maybe_vex")
7154 (set_attr "length_immediate" "1")
7155 (set_attr "mode" "TI")])
7157 (define_expand "sse2_pshuflw"
7158 [(match_operand:V8HI 0 "register_operand" "")
7159 (match_operand:V8HI 1 "nonimmediate_operand" "")
7160 (match_operand:SI 2 "const_int_operand" "")]
7163 int mask = INTVAL (operands[2]);
7164 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7165 GEN_INT ((mask >> 0) & 3),
7166 GEN_INT ((mask >> 2) & 3),
7167 GEN_INT ((mask >> 4) & 3),
7168 GEN_INT ((mask >> 6) & 3)));
7172 (define_insn "sse2_pshuflw_1"
7173 [(set (match_operand:V8HI 0 "register_operand" "=x")
7175 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7176 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7177 (match_operand 3 "const_0_to_3_operand" "")
7178 (match_operand 4 "const_0_to_3_operand" "")
7179 (match_operand 5 "const_0_to_3_operand" "")
7187 mask |= INTVAL (operands[2]) << 0;
7188 mask |= INTVAL (operands[3]) << 2;
7189 mask |= INTVAL (operands[4]) << 4;
7190 mask |= INTVAL (operands[5]) << 6;
7191 operands[2] = GEN_INT (mask);
7193 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7195 [(set_attr "type" "sselog")
7196 (set_attr "prefix_data16" "0")
7197 (set_attr "prefix_rep" "1")
7198 (set_attr "prefix" "maybe_vex")
7199 (set_attr "length_immediate" "1")
7200 (set_attr "mode" "TI")])
7202 (define_expand "sse2_pshufhw"
7203 [(match_operand:V8HI 0 "register_operand" "")
7204 (match_operand:V8HI 1 "nonimmediate_operand" "")
7205 (match_operand:SI 2 "const_int_operand" "")]
7208 int mask = INTVAL (operands[2]);
7209 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7210 GEN_INT (((mask >> 0) & 3) + 4),
7211 GEN_INT (((mask >> 2) & 3) + 4),
7212 GEN_INT (((mask >> 4) & 3) + 4),
7213 GEN_INT (((mask >> 6) & 3) + 4)));
7217 (define_insn "sse2_pshufhw_1"
7218 [(set (match_operand:V8HI 0 "register_operand" "=x")
7220 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7221 (parallel [(const_int 0)
7225 (match_operand 2 "const_4_to_7_operand" "")
7226 (match_operand 3 "const_4_to_7_operand" "")
7227 (match_operand 4 "const_4_to_7_operand" "")
7228 (match_operand 5 "const_4_to_7_operand" "")])))]
7232 mask |= (INTVAL (operands[2]) - 4) << 0;
7233 mask |= (INTVAL (operands[3]) - 4) << 2;
7234 mask |= (INTVAL (operands[4]) - 4) << 4;
7235 mask |= (INTVAL (operands[5]) - 4) << 6;
7236 operands[2] = GEN_INT (mask);
7238 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7240 [(set_attr "type" "sselog")
7241 (set_attr "prefix_rep" "1")
7242 (set_attr "prefix_data16" "0")
7243 (set_attr "prefix" "maybe_vex")
7244 (set_attr "length_immediate" "1")
7245 (set_attr "mode" "TI")])
7247 (define_expand "sse2_loadd"
7248 [(set (match_operand:V4SI 0 "register_operand" "")
7251 (match_operand:SI 1 "nonimmediate_operand" ""))
7255 "operands[2] = CONST0_RTX (V4SImode);")
7257 (define_insn "*avx_loadld"
7258 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7261 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7262 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7266 vmovd\t{%2, %0|%0, %2}
7267 vmovd\t{%2, %0|%0, %2}
7268 vmovss\t{%2, %1, %0|%0, %1, %2}"
7269 [(set_attr "type" "ssemov")
7270 (set_attr "prefix" "vex")
7271 (set_attr "mode" "TI,TI,V4SF")])
7273 (define_insn "sse2_loadld"
7274 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7277 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7278 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7282 movd\t{%2, %0|%0, %2}
7283 movd\t{%2, %0|%0, %2}
7284 movss\t{%2, %0|%0, %2}
7285 movss\t{%2, %0|%0, %2}"
7286 [(set_attr "type" "ssemov")
7287 (set_attr "mode" "TI,TI,V4SF,SF")])
7289 (define_insn_and_split "sse2_stored"
7290 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7292 (match_operand:V4SI 1 "register_operand" "x,Yi")
7293 (parallel [(const_int 0)])))]
7296 "&& reload_completed
7297 && (TARGET_INTER_UNIT_MOVES
7298 || MEM_P (operands [0])
7299 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7300 [(set (match_dup 0) (match_dup 1))]
7301 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7303 (define_insn_and_split "*vec_ext_v4si_mem"
7304 [(set (match_operand:SI 0 "register_operand" "=r")
7306 (match_operand:V4SI 1 "memory_operand" "o")
7307 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7313 int i = INTVAL (operands[2]);
7315 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7319 (define_expand "sse_storeq"
7320 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7322 (match_operand:V2DI 1 "register_operand" "")
7323 (parallel [(const_int 0)])))]
7326 (define_insn "*sse2_storeq_rex64"
7327 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7329 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7330 (parallel [(const_int 0)])))]
7331 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7335 %vmov{q}\t{%1, %0|%0, %1}"
7336 [(set_attr "type" "*,*,imov")
7337 (set_attr "prefix" "*,*,maybe_vex")
7338 (set_attr "mode" "*,*,DI")])
7340 (define_insn "*sse2_storeq"
7341 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7343 (match_operand:V2DI 1 "register_operand" "x")
7344 (parallel [(const_int 0)])))]
7349 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7351 (match_operand:V2DI 1 "register_operand" "")
7352 (parallel [(const_int 0)])))]
7355 && (TARGET_INTER_UNIT_MOVES
7356 || MEM_P (operands [0])
7357 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7358 [(set (match_dup 0) (match_dup 1))]
7359 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7361 (define_insn "*vec_extractv2di_1_rex64_avx"
7362 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7364 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7365 (parallel [(const_int 1)])))]
7368 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7370 vmovhps\t{%1, %0|%0, %1}
7371 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7372 vmovq\t{%H1, %0|%0, %H1}
7373 vmov{q}\t{%H1, %0|%0, %H1}"
7374 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7375 (set_attr "length_immediate" "*,1,*,*")
7376 (set_attr "memory" "*,none,*,*")
7377 (set_attr "prefix" "vex")
7378 (set_attr "mode" "V2SF,TI,TI,DI")])
7380 (define_insn "*vec_extractv2di_1_rex64"
7381 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7383 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7384 (parallel [(const_int 1)])))]
7385 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7387 movhps\t{%1, %0|%0, %1}
7388 psrldq\t{$8, %0|%0, 8}
7389 movq\t{%H1, %0|%0, %H1}
7390 mov{q}\t{%H1, %0|%0, %H1}"
7391 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7392 (set_attr "length_immediate" "*,1,*,*")
7393 (set_attr "memory" "*,none,*,*")
7394 (set_attr "mode" "V2SF,TI,TI,DI")])
7396 (define_insn "*vec_extractv2di_1_avx"
7397 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7399 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7400 (parallel [(const_int 1)])))]
7403 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7405 vmovhps\t{%1, %0|%0, %1}
7406 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7407 vmovq\t{%H1, %0|%0, %H1}"
7408 [(set_attr "type" "ssemov,sseishft1,ssemov")
7409 (set_attr "length_immediate" "*,1,*")
7410 (set_attr "memory" "*,none,*")
7411 (set_attr "prefix" "vex")
7412 (set_attr "mode" "V2SF,TI,TI")])
7414 (define_insn "*vec_extractv2di_1_sse2"
7415 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7417 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7418 (parallel [(const_int 1)])))]
7420 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7422 movhps\t{%1, %0|%0, %1}
7423 psrldq\t{$8, %0|%0, 8}
7424 movq\t{%H1, %0|%0, %H1}"
7425 [(set_attr "type" "ssemov,sseishft1,ssemov")
7426 (set_attr "length_immediate" "*,1,*")
7427 (set_attr "memory" "*,none,*")
7428 (set_attr "mode" "V2SF,TI,TI")])
7430 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7431 (define_insn "*vec_extractv2di_1_sse"
7432 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7434 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7435 (parallel [(const_int 1)])))]
7436 "!TARGET_SSE2 && TARGET_SSE
7437 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7439 movhps\t{%1, %0|%0, %1}
7440 movhlps\t{%1, %0|%0, %1}
7441 movlps\t{%H1, %0|%0, %H1}"
7442 [(set_attr "type" "ssemov")
7443 (set_attr "mode" "V2SF,V4SF,V2SF")])
7445 (define_insn "*vec_dupv4si_avx"
7446 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7448 (match_operand:SI 1 "register_operand" "x,m")))]
7451 vpshufd\t{$0, %1, %0|%0, %1, 0}
7452 vbroadcastss\t{%1, %0|%0, %1}"
7453 [(set_attr "type" "sselog1,ssemov")
7454 (set_attr "length_immediate" "1,0")
7455 (set_attr "prefix_extra" "0,1")
7456 (set_attr "prefix" "vex")
7457 (set_attr "mode" "TI,V4SF")])
7459 (define_insn "*vec_dupv4si"
7460 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7462 (match_operand:SI 1 "register_operand" " Y2,0")))]
7465 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7466 shufps\t{$0, %0, %0|%0, %0, 0}"
7467 [(set_attr "type" "sselog1")
7468 (set_attr "length_immediate" "1")
7469 (set_attr "mode" "TI,V4SF")])
7471 (define_insn "*vec_dupv2di_avx"
7472 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7474 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7477 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7478 vmovddup\t{%1, %0|%0, %1}"
7479 [(set_attr "type" "sselog1")
7480 (set_attr "prefix" "vex")
7481 (set_attr "mode" "TI,DF")])
7483 (define_insn "*vec_dupv2di_sse3"
7484 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7486 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7490 movddup\t{%1, %0|%0, %1}"
7491 [(set_attr "type" "sselog1")
7492 (set_attr "mode" "TI,DF")])
7494 (define_insn "*vec_dupv2di"
7495 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7497 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7502 [(set_attr "type" "sselog1,ssemov")
7503 (set_attr "mode" "TI,V4SF")])
7505 (define_insn "*vec_concatv2si_avx"
7506 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7508 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7509 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7512 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7513 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7514 vmovd\t{%1, %0|%0, %1}
7515 punpckldq\t{%2, %0|%0, %2}
7516 movd\t{%1, %0|%0, %1}"
7517 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7518 (set_attr "prefix_extra" "1,*,*,*,*")
7519 (set_attr "length_immediate" "1,*,*,*,*")
7520 (set (attr "prefix")
7521 (if_then_else (eq_attr "alternative" "3,4")
7522 (const_string "orig")
7523 (const_string "vex")))
7524 (set_attr "mode" "TI,TI,TI,DI,DI")])
7526 (define_insn "*vec_concatv2si_sse4_1"
7527 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7529 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7530 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7533 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7534 punpckldq\t{%2, %0|%0, %2}
7535 movd\t{%1, %0|%0, %1}
7536 punpckldq\t{%2, %0|%0, %2}
7537 movd\t{%1, %0|%0, %1}"
7538 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7539 (set_attr "prefix_extra" "1,*,*,*,*")
7540 (set_attr "length_immediate" "1,*,*,*,*")
7541 (set_attr "mode" "TI,TI,TI,DI,DI")])
7543 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7544 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7545 ;; alternatives pretty much forces the MMX alternative to be chosen.
7546 (define_insn "*vec_concatv2si_sse2"
7547 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7549 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7550 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7553 punpckldq\t{%2, %0|%0, %2}
7554 movd\t{%1, %0|%0, %1}
7555 punpckldq\t{%2, %0|%0, %2}
7556 movd\t{%1, %0|%0, %1}"
7557 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7558 (set_attr "mode" "TI,TI,DI,DI")])
7560 (define_insn "*vec_concatv2si_sse"
7561 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7563 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7564 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7567 unpcklps\t{%2, %0|%0, %2}
7568 movss\t{%1, %0|%0, %1}
7569 punpckldq\t{%2, %0|%0, %2}
7570 movd\t{%1, %0|%0, %1}"
7571 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7572 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7574 (define_insn "*vec_concatv4si_1_avx"
7575 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7577 (match_operand:V2SI 1 "register_operand" " x,x")
7578 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7581 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7582 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7583 [(set_attr "type" "sselog,ssemov")
7584 (set_attr "prefix" "vex")
7585 (set_attr "mode" "TI,V2SF")])
7587 (define_insn "*vec_concatv4si_1"
7588 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7590 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7591 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7594 punpcklqdq\t{%2, %0|%0, %2}
7595 movlhps\t{%2, %0|%0, %2}
7596 movhps\t{%2, %0|%0, %2}"
7597 [(set_attr "type" "sselog,ssemov,ssemov")
7598 (set_attr "mode" "TI,V4SF,V2SF")])
7600 (define_insn "*vec_concatv2di_avx"
7601 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7603 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7604 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7605 "!TARGET_64BIT && TARGET_AVX"
7607 vmovq\t{%1, %0|%0, %1}
7608 movq2dq\t{%1, %0|%0, %1}
7609 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7610 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7611 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7612 (set (attr "prefix")
7613 (if_then_else (eq_attr "alternative" "1")
7614 (const_string "orig")
7615 (const_string "vex")))
7616 (set_attr "mode" "TI,TI,TI,V2SF")])
7618 (define_insn "vec_concatv2di"
7619 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7621 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7622 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7623 "!TARGET_64BIT && TARGET_SSE"
7625 movq\t{%1, %0|%0, %1}
7626 movq2dq\t{%1, %0|%0, %1}
7627 punpcklqdq\t{%2, %0|%0, %2}
7628 movlhps\t{%2, %0|%0, %2}
7629 movhps\t{%2, %0|%0, %2}"
7630 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7631 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7633 (define_insn "*vec_concatv2di_rex64_avx"
7634 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7636 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7637 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7638 "TARGET_64BIT && TARGET_AVX"
7640 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7641 vmovq\t{%1, %0|%0, %1}
7642 vmovq\t{%1, %0|%0, %1}
7643 movq2dq\t{%1, %0|%0, %1}
7644 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7645 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7646 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7647 (set_attr "prefix_extra" "1,*,*,*,*,*")
7648 (set_attr "length_immediate" "1,*,*,*,*,*")
7649 (set (attr "prefix")
7650 (if_then_else (eq_attr "alternative" "3")
7651 (const_string "orig")
7652 (const_string "vex")))
7653 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7655 (define_insn "*vec_concatv2di_rex64_sse4_1"
7656 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7658 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7659 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7660 "TARGET_64BIT && TARGET_SSE4_1"
7662 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7663 movq\t{%1, %0|%0, %1}
7664 movq\t{%1, %0|%0, %1}
7665 movq2dq\t{%1, %0|%0, %1}
7666 punpcklqdq\t{%2, %0|%0, %2}
7667 movlhps\t{%2, %0|%0, %2}
7668 movhps\t{%2, %0|%0, %2}"
7669 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7670 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7671 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7672 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7673 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7675 (define_insn "*vec_concatv2di_rex64_sse"
7676 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7678 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7679 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7680 "TARGET_64BIT && TARGET_SSE"
7682 movq\t{%1, %0|%0, %1}
7683 movq\t{%1, %0|%0, %1}
7684 movq2dq\t{%1, %0|%0, %1}
7685 punpcklqdq\t{%2, %0|%0, %2}
7686 movlhps\t{%2, %0|%0, %2}
7687 movhps\t{%2, %0|%0, %2}"
7688 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7689 (set_attr "prefix_rex" "*,1,*,*,*,*")
7690 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7692 (define_expand "vec_unpacku_hi_v16qi"
7693 [(match_operand:V8HI 0 "register_operand" "")
7694 (match_operand:V16QI 1 "register_operand" "")]
7698 ix86_expand_sse4_unpack (operands, true, true);
7700 ix86_expand_sse_unpack (operands, true, true);
7704 (define_expand "vec_unpacks_hi_v16qi"
7705 [(match_operand:V8HI 0 "register_operand" "")
7706 (match_operand:V16QI 1 "register_operand" "")]
7710 ix86_expand_sse4_unpack (operands, false, true);
7712 ix86_expand_sse_unpack (operands, false, true);
7716 (define_expand "vec_unpacku_lo_v16qi"
7717 [(match_operand:V8HI 0 "register_operand" "")
7718 (match_operand:V16QI 1 "register_operand" "")]
7722 ix86_expand_sse4_unpack (operands, true, false);
7724 ix86_expand_sse_unpack (operands, true, false);
7728 (define_expand "vec_unpacks_lo_v16qi"
7729 [(match_operand:V8HI 0 "register_operand" "")
7730 (match_operand:V16QI 1 "register_operand" "")]
7734 ix86_expand_sse4_unpack (operands, false, false);
7736 ix86_expand_sse_unpack (operands, false, false);
7740 (define_expand "vec_unpacku_hi_v8hi"
7741 [(match_operand:V4SI 0 "register_operand" "")
7742 (match_operand:V8HI 1 "register_operand" "")]
7746 ix86_expand_sse4_unpack (operands, true, true);
7748 ix86_expand_sse_unpack (operands, true, true);
7752 (define_expand "vec_unpacks_hi_v8hi"
7753 [(match_operand:V4SI 0 "register_operand" "")
7754 (match_operand:V8HI 1 "register_operand" "")]
7758 ix86_expand_sse4_unpack (operands, false, true);
7760 ix86_expand_sse_unpack (operands, false, true);
7764 (define_expand "vec_unpacku_lo_v8hi"
7765 [(match_operand:V4SI 0 "register_operand" "")
7766 (match_operand:V8HI 1 "register_operand" "")]
7770 ix86_expand_sse4_unpack (operands, true, false);
7772 ix86_expand_sse_unpack (operands, true, false);
7776 (define_expand "vec_unpacks_lo_v8hi"
7777 [(match_operand:V4SI 0 "register_operand" "")
7778 (match_operand:V8HI 1 "register_operand" "")]
7782 ix86_expand_sse4_unpack (operands, false, false);
7784 ix86_expand_sse_unpack (operands, false, false);
7788 (define_expand "vec_unpacku_hi_v4si"
7789 [(match_operand:V2DI 0 "register_operand" "")
7790 (match_operand:V4SI 1 "register_operand" "")]
7794 ix86_expand_sse4_unpack (operands, true, true);
7796 ix86_expand_sse_unpack (operands, true, true);
7800 (define_expand "vec_unpacks_hi_v4si"
7801 [(match_operand:V2DI 0 "register_operand" "")
7802 (match_operand:V4SI 1 "register_operand" "")]
7806 ix86_expand_sse4_unpack (operands, false, true);
7808 ix86_expand_sse_unpack (operands, false, true);
7812 (define_expand "vec_unpacku_lo_v4si"
7813 [(match_operand:V2DI 0 "register_operand" "")
7814 (match_operand:V4SI 1 "register_operand" "")]
7818 ix86_expand_sse4_unpack (operands, true, false);
7820 ix86_expand_sse_unpack (operands, true, false);
7824 (define_expand "vec_unpacks_lo_v4si"
7825 [(match_operand:V2DI 0 "register_operand" "")
7826 (match_operand:V4SI 1 "register_operand" "")]
7830 ix86_expand_sse4_unpack (operands, false, false);
7832 ix86_expand_sse_unpack (operands, false, false);
7836 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7840 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7842 (define_expand "sse2_uavgv16qi3"
7843 [(set (match_operand:V16QI 0 "register_operand" "")
7849 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7851 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7852 (const_vector:V16QI [(const_int 1) (const_int 1)
7853 (const_int 1) (const_int 1)
7854 (const_int 1) (const_int 1)
7855 (const_int 1) (const_int 1)
7856 (const_int 1) (const_int 1)
7857 (const_int 1) (const_int 1)
7858 (const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)]))
7862 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7864 (define_insn "*avx_uavgv16qi3"
7865 [(set (match_operand:V16QI 0 "register_operand" "=x")
7871 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7873 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7874 (const_vector:V16QI [(const_int 1) (const_int 1)
7875 (const_int 1) (const_int 1)
7876 (const_int 1) (const_int 1)
7877 (const_int 1) (const_int 1)
7878 (const_int 1) (const_int 1)
7879 (const_int 1) (const_int 1)
7880 (const_int 1) (const_int 1)
7881 (const_int 1) (const_int 1)]))
7883 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7884 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7885 [(set_attr "type" "sseiadd")
7886 (set_attr "prefix" "vex")
7887 (set_attr "mode" "TI")])
7889 (define_insn "*sse2_uavgv16qi3"
7890 [(set (match_operand:V16QI 0 "register_operand" "=x")
7896 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7899 (const_vector:V16QI [(const_int 1) (const_int 1)
7900 (const_int 1) (const_int 1)
7901 (const_int 1) (const_int 1)
7902 (const_int 1) (const_int 1)
7903 (const_int 1) (const_int 1)
7904 (const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)]))
7908 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7909 "pavgb\t{%2, %0|%0, %2}"
7910 [(set_attr "type" "sseiadd")
7911 (set_attr "prefix_data16" "1")
7912 (set_attr "mode" "TI")])
7914 (define_expand "sse2_uavgv8hi3"
7915 [(set (match_operand:V8HI 0 "register_operand" "")
7921 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7923 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7924 (const_vector:V8HI [(const_int 1) (const_int 1)
7925 (const_int 1) (const_int 1)
7926 (const_int 1) (const_int 1)
7927 (const_int 1) (const_int 1)]))
7930 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7932 (define_insn "*avx_uavgv8hi3"
7933 [(set (match_operand:V8HI 0 "register_operand" "=x")
7939 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7941 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7942 (const_vector:V8HI [(const_int 1) (const_int 1)
7943 (const_int 1) (const_int 1)
7944 (const_int 1) (const_int 1)
7945 (const_int 1) (const_int 1)]))
7947 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7948 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7949 [(set_attr "type" "sseiadd")
7950 (set_attr "prefix" "vex")
7951 (set_attr "mode" "TI")])
7953 (define_insn "*sse2_uavgv8hi3"
7954 [(set (match_operand:V8HI 0 "register_operand" "=x")
7960 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7962 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7963 (const_vector:V8HI [(const_int 1) (const_int 1)
7964 (const_int 1) (const_int 1)
7965 (const_int 1) (const_int 1)
7966 (const_int 1) (const_int 1)]))
7968 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7969 "pavgw\t{%2, %0|%0, %2}"
7970 [(set_attr "type" "sseiadd")
7971 (set_attr "prefix_data16" "1")
7972 (set_attr "mode" "TI")])
7974 ;; The correct representation for this is absolutely enormous, and
7975 ;; surely not generally useful.
7976 (define_insn "*avx_psadbw"
7977 [(set (match_operand:V2DI 0 "register_operand" "=x")
7978 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7979 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7982 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7983 [(set_attr "type" "sseiadd")
7984 (set_attr "prefix" "vex")
7985 (set_attr "mode" "TI")])
7987 (define_insn "sse2_psadbw"
7988 [(set (match_operand:V2DI 0 "register_operand" "=x")
7989 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7990 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7993 "psadbw\t{%2, %0|%0, %2}"
7994 [(set_attr "type" "sseiadd")
7995 (set_attr "atom_unit" "simul")
7996 (set_attr "prefix_data16" "1")
7997 (set_attr "mode" "TI")])
7999 (define_insn "avx_movmsk<ssemodesuffix>256"
8000 [(set (match_operand:SI 0 "register_operand" "=r")
8002 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8004 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8005 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8006 [(set_attr "type" "ssecvt")
8007 (set_attr "prefix" "vex")
8008 (set_attr "mode" "<MODE>")])
8010 (define_insn "<sse>_movmsk<ssemodesuffix>"
8011 [(set (match_operand:SI 0 "register_operand" "=r")
8013 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8015 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8016 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8017 [(set_attr "type" "ssemov")
8018 (set_attr "prefix" "maybe_vex")
8019 (set_attr "mode" "<MODE>")])
8021 (define_insn "sse2_pmovmskb"
8022 [(set (match_operand:SI 0 "register_operand" "=r")
8023 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8026 "%vpmovmskb\t{%1, %0|%0, %1}"
8027 [(set_attr "type" "ssemov")
8028 (set_attr "prefix_data16" "1")
8029 (set_attr "prefix" "maybe_vex")
8030 (set_attr "mode" "SI")])
8032 (define_expand "sse2_maskmovdqu"
8033 [(set (match_operand:V16QI 0 "memory_operand" "")
8034 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8035 (match_operand:V16QI 2 "register_operand" "")
8040 (define_insn "*sse2_maskmovdqu"
8041 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8042 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8043 (match_operand:V16QI 2 "register_operand" "x")
8044 (mem:V16QI (match_dup 0))]
8046 "TARGET_SSE2 && !TARGET_64BIT"
8047 ;; @@@ check ordering of operands in intel/nonintel syntax
8048 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8049 [(set_attr "type" "ssemov")
8050 (set_attr "prefix_data16" "1")
8051 ;; The implicit %rdi operand confuses default length_vex computation.
8052 (set_attr "length_vex" "3")
8053 (set_attr "prefix" "maybe_vex")
8054 (set_attr "mode" "TI")])
8056 (define_insn "*sse2_maskmovdqu_rex64"
8057 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8058 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8059 (match_operand:V16QI 2 "register_operand" "x")
8060 (mem:V16QI (match_dup 0))]
8062 "TARGET_SSE2 && TARGET_64BIT"
8063 ;; @@@ check ordering of operands in intel/nonintel syntax
8064 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8065 [(set_attr "type" "ssemov")
8066 (set_attr "prefix_data16" "1")
8067 ;; The implicit %rdi operand confuses default length_vex computation.
8068 (set (attr "length_vex")
8069 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8070 (set_attr "prefix" "maybe_vex")
8071 (set_attr "mode" "TI")])
8073 (define_insn "sse_ldmxcsr"
8074 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8078 [(set_attr "type" "sse")
8079 (set_attr "atom_sse_attr" "mxcsr")
8080 (set_attr "prefix" "maybe_vex")
8081 (set_attr "memory" "load")])
8083 (define_insn "sse_stmxcsr"
8084 [(set (match_operand:SI 0 "memory_operand" "=m")
8085 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8088 [(set_attr "type" "sse")
8089 (set_attr "atom_sse_attr" "mxcsr")
8090 (set_attr "prefix" "maybe_vex")
8091 (set_attr "memory" "store")])
8093 (define_expand "sse_sfence"
8095 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8096 "TARGET_SSE || TARGET_3DNOW_A"
8098 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8099 MEM_VOLATILE_P (operands[0]) = 1;
8102 (define_insn "*sse_sfence"
8103 [(set (match_operand:BLK 0 "" "")
8104 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8105 "TARGET_SSE || TARGET_3DNOW_A"
8107 [(set_attr "type" "sse")
8108 (set_attr "length_address" "0")
8109 (set_attr "atom_sse_attr" "fence")
8110 (set_attr "memory" "unknown")])
8112 (define_insn "sse2_clflush"
8113 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8117 [(set_attr "type" "sse")
8118 (set_attr "atom_sse_attr" "fence")
8119 (set_attr "memory" "unknown")])
8121 (define_expand "sse2_mfence"
8123 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8126 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8127 MEM_VOLATILE_P (operands[0]) = 1;
8130 (define_insn "*sse2_mfence"
8131 [(set (match_operand:BLK 0 "" "")
8132 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8133 "TARGET_64BIT || TARGET_SSE2"
8135 [(set_attr "type" "sse")
8136 (set_attr "length_address" "0")
8137 (set_attr "atom_sse_attr" "fence")
8138 (set_attr "memory" "unknown")])
8140 (define_expand "sse2_lfence"
8142 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8145 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8146 MEM_VOLATILE_P (operands[0]) = 1;
8149 (define_insn "*sse2_lfence"
8150 [(set (match_operand:BLK 0 "" "")
8151 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8154 [(set_attr "type" "sse")
8155 (set_attr "length_address" "0")
8156 (set_attr "atom_sse_attr" "lfence")
8157 (set_attr "memory" "unknown")])
8159 (define_insn "sse3_mwait"
8160 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8161 (match_operand:SI 1 "register_operand" "c")]
8164 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8165 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8166 ;; we only need to set up 32bit registers.
8168 [(set_attr "length" "3")])
8170 (define_insn "sse3_monitor"
8171 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8172 (match_operand:SI 1 "register_operand" "c")
8173 (match_operand:SI 2 "register_operand" "d")]
8175 "TARGET_SSE3 && !TARGET_64BIT"
8176 "monitor\t%0, %1, %2"
8177 [(set_attr "length" "3")])
8179 (define_insn "sse3_monitor64"
8180 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8181 (match_operand:SI 1 "register_operand" "c")
8182 (match_operand:SI 2 "register_operand" "d")]
8184 "TARGET_SSE3 && TARGET_64BIT"
8185 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8186 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8187 ;; zero extended to 64bit, we only need to set up 32bit registers.
8189 [(set_attr "length" "3")])
8191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8193 ;; SSSE3 instructions
8195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8197 (define_insn "*avx_phaddwv8hi3"
8198 [(set (match_operand:V8HI 0 "register_operand" "=x")
8204 (match_operand:V8HI 1 "register_operand" "x")
8205 (parallel [(const_int 0)]))
8206 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8208 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8209 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8212 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8213 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8215 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8216 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8222 (parallel [(const_int 0)]))
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8225 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8226 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8230 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8232 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8233 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8235 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8236 [(set_attr "type" "sseiadd")
8237 (set_attr "prefix_extra" "1")
8238 (set_attr "prefix" "vex")
8239 (set_attr "mode" "TI")])
8241 (define_insn "ssse3_phaddwv8hi3"
8242 [(set (match_operand:V8HI 0 "register_operand" "=x")
8248 (match_operand:V8HI 1 "register_operand" "0")
8249 (parallel [(const_int 0)]))
8250 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8252 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8257 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8265 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8266 (parallel [(const_int 0)]))
8267 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8269 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8270 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8279 "phaddw\t{%2, %0|%0, %2}"
8280 [(set_attr "type" "sseiadd")
8281 (set_attr "atom_unit" "complex")
8282 (set_attr "prefix_data16" "1")
8283 (set_attr "prefix_extra" "1")
8284 (set_attr "mode" "TI")])
8286 (define_insn "ssse3_phaddwv4hi3"
8287 [(set (match_operand:V4HI 0 "register_operand" "=y")
8292 (match_operand:V4HI 1 "register_operand" "0")
8293 (parallel [(const_int 0)]))
8294 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8296 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8297 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8301 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8302 (parallel [(const_int 0)]))
8303 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8305 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8306 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8308 "phaddw\t{%2, %0|%0, %2}"
8309 [(set_attr "type" "sseiadd")
8310 (set_attr "atom_unit" "complex")
8311 (set_attr "prefix_extra" "1")
8312 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8313 (set_attr "mode" "DI")])
8315 (define_insn "*avx_phadddv4si3"
8316 [(set (match_operand:V4SI 0 "register_operand" "=x")
8321 (match_operand:V4SI 1 "register_operand" "x")
8322 (parallel [(const_int 0)]))
8323 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8325 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8326 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8330 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8331 (parallel [(const_int 0)]))
8332 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8334 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8335 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8337 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8338 [(set_attr "type" "sseiadd")
8339 (set_attr "prefix_extra" "1")
8340 (set_attr "prefix" "vex")
8341 (set_attr "mode" "TI")])
8343 (define_insn "ssse3_phadddv4si3"
8344 [(set (match_operand:V4SI 0 "register_operand" "=x")
8349 (match_operand:V4SI 1 "register_operand" "0")
8350 (parallel [(const_int 0)]))
8351 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8353 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8354 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8358 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8359 (parallel [(const_int 0)]))
8360 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8362 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8363 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8365 "phaddd\t{%2, %0|%0, %2}"
8366 [(set_attr "type" "sseiadd")
8367 (set_attr "atom_unit" "complex")
8368 (set_attr "prefix_data16" "1")
8369 (set_attr "prefix_extra" "1")
8370 (set_attr "mode" "TI")])
8372 (define_insn "ssse3_phadddv2si3"
8373 [(set (match_operand:V2SI 0 "register_operand" "=y")
8377 (match_operand:V2SI 1 "register_operand" "0")
8378 (parallel [(const_int 0)]))
8379 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8382 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8383 (parallel [(const_int 0)]))
8384 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8386 "phaddd\t{%2, %0|%0, %2}"
8387 [(set_attr "type" "sseiadd")
8388 (set_attr "atom_unit" "complex")
8389 (set_attr "prefix_extra" "1")
8390 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8391 (set_attr "mode" "DI")])
8393 (define_insn "*avx_phaddswv8hi3"
8394 [(set (match_operand:V8HI 0 "register_operand" "=x")
8400 (match_operand:V8HI 1 "register_operand" "x")
8401 (parallel [(const_int 0)]))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8405 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8408 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8409 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8411 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8412 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8417 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8418 (parallel [(const_int 0)]))
8419 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8421 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8422 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8425 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8428 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8429 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8431 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8432 [(set_attr "type" "sseiadd")
8433 (set_attr "prefix_extra" "1")
8434 (set_attr "prefix" "vex")
8435 (set_attr "mode" "TI")])
8437 (define_insn "ssse3_phaddswv8hi3"
8438 [(set (match_operand:V8HI 0 "register_operand" "=x")
8444 (match_operand:V8HI 1 "register_operand" "0")
8445 (parallel [(const_int 0)]))
8446 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8449 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8452 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8453 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8456 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8461 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8462 (parallel [(const_int 0)]))
8463 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8465 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8475 "phaddsw\t{%2, %0|%0, %2}"
8476 [(set_attr "type" "sseiadd")
8477 (set_attr "atom_unit" "complex")
8478 (set_attr "prefix_data16" "1")
8479 (set_attr "prefix_extra" "1")
8480 (set_attr "mode" "TI")])
8482 (define_insn "ssse3_phaddswv4hi3"
8483 [(set (match_operand:V4HI 0 "register_operand" "=y")
8488 (match_operand:V4HI 1 "register_operand" "0")
8489 (parallel [(const_int 0)]))
8490 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8492 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8493 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8497 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8498 (parallel [(const_int 0)]))
8499 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8501 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8502 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8504 "phaddsw\t{%2, %0|%0, %2}"
8505 [(set_attr "type" "sseiadd")
8506 (set_attr "atom_unit" "complex")
8507 (set_attr "prefix_extra" "1")
8508 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8509 (set_attr "mode" "DI")])
8511 (define_insn "*avx_phsubwv8hi3"
8512 [(set (match_operand:V8HI 0 "register_operand" "=x")
8518 (match_operand:V8HI 1 "register_operand" "x")
8519 (parallel [(const_int 0)]))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8522 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8527 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8535 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8536 (parallel [(const_int 0)]))
8537 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8539 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8540 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8544 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8549 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8550 [(set_attr "type" "sseiadd")
8551 (set_attr "prefix_extra" "1")
8552 (set_attr "prefix" "vex")
8553 (set_attr "mode" "TI")])
8555 (define_insn "ssse3_phsubwv8hi3"
8556 [(set (match_operand:V8HI 0 "register_operand" "=x")
8562 (match_operand:V8HI 1 "register_operand" "0")
8563 (parallel [(const_int 0)]))
8564 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8566 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8567 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8571 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8573 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8574 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8579 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8580 (parallel [(const_int 0)]))
8581 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8583 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8584 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8587 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8588 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8590 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8593 "phsubw\t{%2, %0|%0, %2}"
8594 [(set_attr "type" "sseiadd")
8595 (set_attr "atom_unit" "complex")
8596 (set_attr "prefix_data16" "1")
8597 (set_attr "prefix_extra" "1")
8598 (set_attr "mode" "TI")])
8600 (define_insn "ssse3_phsubwv4hi3"
8601 [(set (match_operand:V4HI 0 "register_operand" "=y")
8606 (match_operand:V4HI 1 "register_operand" "0")
8607 (parallel [(const_int 0)]))
8608 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8610 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8615 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8616 (parallel [(const_int 0)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8622 "phsubw\t{%2, %0|%0, %2}"
8623 [(set_attr "type" "sseiadd")
8624 (set_attr "atom_unit" "complex")
8625 (set_attr "prefix_extra" "1")
8626 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8627 (set_attr "mode" "DI")])
8629 (define_insn "*avx_phsubdv4si3"
8630 [(set (match_operand:V4SI 0 "register_operand" "=x")
8635 (match_operand:V4SI 1 "register_operand" "x")
8636 (parallel [(const_int 0)]))
8637 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8639 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8640 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8644 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8645 (parallel [(const_int 0)]))
8646 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8648 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8649 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8651 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8652 [(set_attr "type" "sseiadd")
8653 (set_attr "prefix_extra" "1")
8654 (set_attr "prefix" "vex")
8655 (set_attr "mode" "TI")])
8657 (define_insn "ssse3_phsubdv4si3"
8658 [(set (match_operand:V4SI 0 "register_operand" "=x")
8663 (match_operand:V4SI 1 "register_operand" "0")
8664 (parallel [(const_int 0)]))
8665 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8667 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8668 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8672 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8673 (parallel [(const_int 0)]))
8674 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8676 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8677 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8679 "phsubd\t{%2, %0|%0, %2}"
8680 [(set_attr "type" "sseiadd")
8681 (set_attr "atom_unit" "complex")
8682 (set_attr "prefix_data16" "1")
8683 (set_attr "prefix_extra" "1")
8684 (set_attr "mode" "TI")])
8686 (define_insn "ssse3_phsubdv2si3"
8687 [(set (match_operand:V2SI 0 "register_operand" "=y")
8691 (match_operand:V2SI 1 "register_operand" "0")
8692 (parallel [(const_int 0)]))
8693 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8696 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8697 (parallel [(const_int 0)]))
8698 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8700 "phsubd\t{%2, %0|%0, %2}"
8701 [(set_attr "type" "sseiadd")
8702 (set_attr "atom_unit" "complex")
8703 (set_attr "prefix_extra" "1")
8704 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8705 (set_attr "mode" "DI")])
8707 (define_insn "*avx_phsubswv8hi3"
8708 [(set (match_operand:V8HI 0 "register_operand" "=x")
8714 (match_operand:V8HI 1 "register_operand" "x")
8715 (parallel [(const_int 0)]))
8716 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8718 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8719 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8722 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8723 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8725 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8726 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8731 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8732 (parallel [(const_int 0)]))
8733 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8735 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8736 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8739 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8742 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8743 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8745 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8746 [(set_attr "type" "sseiadd")
8747 (set_attr "prefix_extra" "1")
8748 (set_attr "prefix" "vex")
8749 (set_attr "mode" "TI")])
8751 (define_insn "ssse3_phsubswv8hi3"
8752 [(set (match_operand:V8HI 0 "register_operand" "=x")
8758 (match_operand:V8HI 1 "register_operand" "0")
8759 (parallel [(const_int 0)]))
8760 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8762 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8763 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8766 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8769 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8775 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8776 (parallel [(const_int 0)]))
8777 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8779 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8780 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8783 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8786 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8787 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8789 "phsubsw\t{%2, %0|%0, %2}"
8790 [(set_attr "type" "sseiadd")
8791 (set_attr "atom_unit" "complex")
8792 (set_attr "prefix_data16" "1")
8793 (set_attr "prefix_extra" "1")
8794 (set_attr "mode" "TI")])
8796 (define_insn "ssse3_phsubswv4hi3"
8797 [(set (match_operand:V4HI 0 "register_operand" "=y")
8802 (match_operand:V4HI 1 "register_operand" "0")
8803 (parallel [(const_int 0)]))
8804 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8806 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8807 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8811 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8812 (parallel [(const_int 0)]))
8813 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8815 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8818 "phsubsw\t{%2, %0|%0, %2}"
8819 [(set_attr "type" "sseiadd")
8820 (set_attr "atom_unit" "complex")
8821 (set_attr "prefix_extra" "1")
8822 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8823 (set_attr "mode" "DI")])
8825 (define_insn "*avx_pmaddubsw128"
8826 [(set (match_operand:V8HI 0 "register_operand" "=x")
8831 (match_operand:V16QI 1 "register_operand" "x")
8832 (parallel [(const_int 0)
8842 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8843 (parallel [(const_int 0)
8853 (vec_select:V16QI (match_dup 1)
8854 (parallel [(const_int 1)
8863 (vec_select:V16QI (match_dup 2)
8864 (parallel [(const_int 1)
8871 (const_int 15)]))))))]
8873 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8874 [(set_attr "type" "sseiadd")
8875 (set_attr "prefix_extra" "1")
8876 (set_attr "prefix" "vex")
8877 (set_attr "mode" "TI")])
8879 (define_insn "ssse3_pmaddubsw128"
8880 [(set (match_operand:V8HI 0 "register_operand" "=x")
8885 (match_operand:V16QI 1 "register_operand" "0")
8886 (parallel [(const_int 0)
8896 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8897 (parallel [(const_int 0)
8907 (vec_select:V16QI (match_dup 1)
8908 (parallel [(const_int 1)
8917 (vec_select:V16QI (match_dup 2)
8918 (parallel [(const_int 1)
8925 (const_int 15)]))))))]
8927 "pmaddubsw\t{%2, %0|%0, %2}"
8928 [(set_attr "type" "sseiadd")
8929 (set_attr "atom_unit" "simul")
8930 (set_attr "prefix_data16" "1")
8931 (set_attr "prefix_extra" "1")
8932 (set_attr "mode" "TI")])
8934 (define_insn "ssse3_pmaddubsw"
8935 [(set (match_operand:V4HI 0 "register_operand" "=y")
8940 (match_operand:V8QI 1 "register_operand" "0")
8941 (parallel [(const_int 0)
8947 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8948 (parallel [(const_int 0)
8954 (vec_select:V8QI (match_dup 1)
8955 (parallel [(const_int 1)
8960 (vec_select:V8QI (match_dup 2)
8961 (parallel [(const_int 1)
8964 (const_int 7)]))))))]
8966 "pmaddubsw\t{%2, %0|%0, %2}"
8967 [(set_attr "type" "sseiadd")
8968 (set_attr "atom_unit" "simul")
8969 (set_attr "prefix_extra" "1")
8970 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8971 (set_attr "mode" "DI")])
8973 (define_expand "ssse3_pmulhrswv8hi3"
8974 [(set (match_operand:V8HI 0 "register_operand" "")
8981 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8983 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8985 (const_vector:V8HI [(const_int 1) (const_int 1)
8986 (const_int 1) (const_int 1)
8987 (const_int 1) (const_int 1)
8988 (const_int 1) (const_int 1)]))
8991 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8993 (define_insn "*avx_pmulhrswv8hi3"
8994 [(set (match_operand:V8HI 0 "register_operand" "=x")
9001 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9003 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9005 (const_vector:V8HI [(const_int 1) (const_int 1)
9006 (const_int 1) (const_int 1)
9007 (const_int 1) (const_int 1)
9008 (const_int 1) (const_int 1)]))
9010 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9011 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9012 [(set_attr "type" "sseimul")
9013 (set_attr "prefix_extra" "1")
9014 (set_attr "prefix" "vex")
9015 (set_attr "mode" "TI")])
9017 (define_insn "*ssse3_pmulhrswv8hi3"
9018 [(set (match_operand:V8HI 0 "register_operand" "=x")
9025 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9027 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9029 (const_vector:V8HI [(const_int 1) (const_int 1)
9030 (const_int 1) (const_int 1)
9031 (const_int 1) (const_int 1)
9032 (const_int 1) (const_int 1)]))
9034 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9035 "pmulhrsw\t{%2, %0|%0, %2}"
9036 [(set_attr "type" "sseimul")
9037 (set_attr "prefix_data16" "1")
9038 (set_attr "prefix_extra" "1")
9039 (set_attr "mode" "TI")])
9041 (define_expand "ssse3_pmulhrswv4hi3"
9042 [(set (match_operand:V4HI 0 "register_operand" "")
9049 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9051 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9053 (const_vector:V4HI [(const_int 1) (const_int 1)
9054 (const_int 1) (const_int 1)]))
9057 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9059 (define_insn "*ssse3_pmulhrswv4hi3"
9060 [(set (match_operand:V4HI 0 "register_operand" "=y")
9067 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9069 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9071 (const_vector:V4HI [(const_int 1) (const_int 1)
9072 (const_int 1) (const_int 1)]))
9074 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9075 "pmulhrsw\t{%2, %0|%0, %2}"
9076 [(set_attr "type" "sseimul")
9077 (set_attr "prefix_extra" "1")
9078 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9079 (set_attr "mode" "DI")])
9081 (define_insn "*avx_pshufbv16qi3"
9082 [(set (match_operand:V16QI 0 "register_operand" "=x")
9083 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9084 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9087 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9088 [(set_attr "type" "sselog1")
9089 (set_attr "prefix_extra" "1")
9090 (set_attr "prefix" "vex")
9091 (set_attr "mode" "TI")])
9093 (define_insn "ssse3_pshufbv16qi3"
9094 [(set (match_operand:V16QI 0 "register_operand" "=x")
9095 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9096 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9099 "pshufb\t{%2, %0|%0, %2}";
9100 [(set_attr "type" "sselog1")
9101 (set_attr "prefix_data16" "1")
9102 (set_attr "prefix_extra" "1")
9103 (set_attr "mode" "TI")])
9105 (define_insn "ssse3_pshufbv8qi3"
9106 [(set (match_operand:V8QI 0 "register_operand" "=y")
9107 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9108 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9111 "pshufb\t{%2, %0|%0, %2}";
9112 [(set_attr "type" "sselog1")
9113 (set_attr "prefix_extra" "1")
9114 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9115 (set_attr "mode" "DI")])
9117 (define_insn "*avx_psign<mode>3"
9118 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9120 [(match_operand:SSEMODE124 1 "register_operand" "x")
9121 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9124 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9125 [(set_attr "type" "sselog1")
9126 (set_attr "prefix_extra" "1")
9127 (set_attr "prefix" "vex")
9128 (set_attr "mode" "TI")])
9130 (define_insn "ssse3_psign<mode>3"
9131 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9133 [(match_operand:SSEMODE124 1 "register_operand" "0")
9134 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9137 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9138 [(set_attr "type" "sselog1")
9139 (set_attr "prefix_data16" "1")
9140 (set_attr "prefix_extra" "1")
9141 (set_attr "mode" "TI")])
9143 (define_insn "ssse3_psign<mode>3"
9144 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9146 [(match_operand:MMXMODEI 1 "register_operand" "0")
9147 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9150 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9151 [(set_attr "type" "sselog1")
9152 (set_attr "prefix_extra" "1")
9153 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9154 (set_attr "mode" "DI")])
9156 (define_insn "*avx_palignrti"
9157 [(set (match_operand:TI 0 "register_operand" "=x")
9158 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9159 (match_operand:TI 2 "nonimmediate_operand" "xm")
9160 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9164 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9165 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9167 [(set_attr "type" "sseishft")
9168 (set_attr "prefix_extra" "1")
9169 (set_attr "length_immediate" "1")
9170 (set_attr "prefix" "vex")
9171 (set_attr "mode" "TI")])
9173 (define_insn "ssse3_palignrti"
9174 [(set (match_operand:TI 0 "register_operand" "=x")
9175 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9176 (match_operand:TI 2 "nonimmediate_operand" "xm")
9177 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9181 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9182 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9184 [(set_attr "type" "sseishft")
9185 (set_attr "atom_unit" "sishuf")
9186 (set_attr "prefix_data16" "1")
9187 (set_attr "prefix_extra" "1")
9188 (set_attr "length_immediate" "1")
9189 (set_attr "mode" "TI")])
9191 (define_insn "ssse3_palignrdi"
9192 [(set (match_operand:DI 0 "register_operand" "=y")
9193 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9194 (match_operand:DI 2 "nonimmediate_operand" "ym")
9195 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9199 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9200 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9202 [(set_attr "type" "sseishft")
9203 (set_attr "atom_unit" "sishuf")
9204 (set_attr "prefix_extra" "1")
9205 (set_attr "length_immediate" "1")
9206 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9207 (set_attr "mode" "DI")])
9209 (define_insn "abs<mode>2"
9210 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9211 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9213 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9214 [(set_attr "type" "sselog1")
9215 (set_attr "prefix_data16" "1")
9216 (set_attr "prefix_extra" "1")
9217 (set_attr "prefix" "maybe_vex")
9218 (set_attr "mode" "TI")])
9220 (define_insn "abs<mode>2"
9221 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9222 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9224 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9225 [(set_attr "type" "sselog1")
9226 (set_attr "prefix_rep" "0")
9227 (set_attr "prefix_extra" "1")
9228 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9229 (set_attr "mode" "DI")])
9231 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9233 ;; AMD SSE4A instructions
9235 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9237 (define_insn "sse4a_movnt<mode>"
9238 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9240 [(match_operand:MODEF 1 "register_operand" "x")]
9243 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9244 [(set_attr "type" "ssemov")
9245 (set_attr "mode" "<MODE>")])
9247 (define_insn "sse4a_vmmovnt<mode>"
9248 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9249 (unspec:<ssescalarmode>
9250 [(vec_select:<ssescalarmode>
9251 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9252 (parallel [(const_int 0)]))]
9255 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9256 [(set_attr "type" "ssemov")
9257 (set_attr "mode" "<ssescalarmode>")])
9259 (define_insn "sse4a_extrqi"
9260 [(set (match_operand:V2DI 0 "register_operand" "=x")
9261 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9262 (match_operand 2 "const_int_operand" "")
9263 (match_operand 3 "const_int_operand" "")]
9266 "extrq\t{%3, %2, %0|%0, %2, %3}"
9267 [(set_attr "type" "sse")
9268 (set_attr "prefix_data16" "1")
9269 (set_attr "length_immediate" "2")
9270 (set_attr "mode" "TI")])
9272 (define_insn "sse4a_extrq"
9273 [(set (match_operand:V2DI 0 "register_operand" "=x")
9274 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9275 (match_operand:V16QI 2 "register_operand" "x")]
9278 "extrq\t{%2, %0|%0, %2}"
9279 [(set_attr "type" "sse")
9280 (set_attr "prefix_data16" "1")
9281 (set_attr "mode" "TI")])
9283 (define_insn "sse4a_insertqi"
9284 [(set (match_operand:V2DI 0 "register_operand" "=x")
9285 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9286 (match_operand:V2DI 2 "register_operand" "x")
9287 (match_operand 3 "const_int_operand" "")
9288 (match_operand 4 "const_int_operand" "")]
9291 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9292 [(set_attr "type" "sseins")
9293 (set_attr "prefix_data16" "0")
9294 (set_attr "prefix_rep" "1")
9295 (set_attr "length_immediate" "2")
9296 (set_attr "mode" "TI")])
9298 (define_insn "sse4a_insertq"
9299 [(set (match_operand:V2DI 0 "register_operand" "=x")
9300 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9301 (match_operand:V2DI 2 "register_operand" "x")]
9304 "insertq\t{%2, %0|%0, %2}"
9305 [(set_attr "type" "sseins")
9306 (set_attr "prefix_data16" "0")
9307 (set_attr "prefix_rep" "1")
9308 (set_attr "mode" "TI")])
9310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9312 ;; Intel SSE4.1 instructions
9314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9316 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9317 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9318 (vec_merge:AVXMODEF2P
9319 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9320 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9321 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9323 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9324 [(set_attr "type" "ssemov")
9325 (set_attr "prefix_extra" "1")
9326 (set_attr "length_immediate" "1")
9327 (set_attr "prefix" "vex")
9328 (set_attr "mode" "<avxvecmode>")])
9330 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9331 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9333 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9334 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9335 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9338 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9339 [(set_attr "type" "ssemov")
9340 (set_attr "prefix_extra" "1")
9341 (set_attr "length_immediate" "1")
9342 (set_attr "prefix" "vex")
9343 (set_attr "mode" "<avxvecmode>")])
9345 (define_insn "sse4_1_blend<ssemodesuffix>"
9346 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9347 (vec_merge:SSEMODEF2P
9348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9349 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9350 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9352 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9353 [(set_attr "type" "ssemov")
9354 (set_attr "prefix_data16" "1")
9355 (set_attr "prefix_extra" "1")
9356 (set_attr "length_immediate" "1")
9357 (set_attr "mode" "<MODE>")])
9359 (define_insn "sse4_1_blendv<ssemodesuffix>"
9360 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9362 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9363 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9364 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9367 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9368 [(set_attr "type" "ssemov")
9369 (set_attr "prefix_data16" "1")
9370 (set_attr "prefix_extra" "1")
9371 (set_attr "mode" "<MODE>")])
9373 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9374 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9376 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9377 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9378 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9381 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9382 [(set_attr "type" "ssemul")
9383 (set_attr "prefix" "vex")
9384 (set_attr "prefix_extra" "1")
9385 (set_attr "length_immediate" "1")
9386 (set_attr "mode" "<avxvecmode>")])
9388 (define_insn "sse4_1_dp<ssemodesuffix>"
9389 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9391 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9392 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9393 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9396 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9397 [(set_attr "type" "ssemul")
9398 (set_attr "prefix_data16" "1")
9399 (set_attr "prefix_extra" "1")
9400 (set_attr "length_immediate" "1")
9401 (set_attr "mode" "<MODE>")])
9403 (define_insn "sse4_1_movntdqa"
9404 [(set (match_operand:V2DI 0 "register_operand" "=x")
9405 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9408 "%vmovntdqa\t{%1, %0|%0, %1}"
9409 [(set_attr "type" "ssemov")
9410 (set_attr "prefix_extra" "1")
9411 (set_attr "prefix" "maybe_vex")
9412 (set_attr "mode" "TI")])
9414 (define_insn "*avx_mpsadbw"
9415 [(set (match_operand:V16QI 0 "register_operand" "=x")
9416 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9417 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9418 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9421 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9422 [(set_attr "type" "sselog1")
9423 (set_attr "prefix" "vex")
9424 (set_attr "prefix_extra" "1")
9425 (set_attr "length_immediate" "1")
9426 (set_attr "mode" "TI")])
9428 (define_insn "sse4_1_mpsadbw"
9429 [(set (match_operand:V16QI 0 "register_operand" "=x")
9430 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9431 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9432 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9435 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9436 [(set_attr "type" "sselog1")
9437 (set_attr "prefix_extra" "1")
9438 (set_attr "length_immediate" "1")
9439 (set_attr "mode" "TI")])
9441 (define_insn "*avx_packusdw"
9442 [(set (match_operand:V8HI 0 "register_operand" "=x")
9445 (match_operand:V4SI 1 "register_operand" "x"))
9447 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9449 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9450 [(set_attr "type" "sselog")
9451 (set_attr "prefix_extra" "1")
9452 (set_attr "prefix" "vex")
9453 (set_attr "mode" "TI")])
9455 (define_insn "sse4_1_packusdw"
9456 [(set (match_operand:V8HI 0 "register_operand" "=x")
9459 (match_operand:V4SI 1 "register_operand" "0"))
9461 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9463 "packusdw\t{%2, %0|%0, %2}"
9464 [(set_attr "type" "sselog")
9465 (set_attr "prefix_extra" "1")
9466 (set_attr "mode" "TI")])
9468 (define_insn "*avx_pblendvb"
9469 [(set (match_operand:V16QI 0 "register_operand" "=x")
9470 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9471 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9472 (match_operand:V16QI 3 "register_operand" "x")]
9475 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9476 [(set_attr "type" "ssemov")
9477 (set_attr "prefix_extra" "1")
9478 (set_attr "length_immediate" "1")
9479 (set_attr "prefix" "vex")
9480 (set_attr "mode" "TI")])
9482 (define_insn "sse4_1_pblendvb"
9483 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9484 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9485 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9486 (match_operand:V16QI 3 "register_operand" "Yz")]
9489 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9490 [(set_attr "type" "ssemov")
9491 (set_attr "prefix_extra" "1")
9492 (set_attr "mode" "TI")])
9494 (define_insn "*avx_pblendw"
9495 [(set (match_operand:V8HI 0 "register_operand" "=x")
9497 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9498 (match_operand:V8HI 1 "register_operand" "x")
9499 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9501 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9502 [(set_attr "type" "ssemov")
9503 (set_attr "prefix" "vex")
9504 (set_attr "prefix_extra" "1")
9505 (set_attr "length_immediate" "1")
9506 (set_attr "mode" "TI")])
9508 (define_insn "sse4_1_pblendw"
9509 [(set (match_operand:V8HI 0 "register_operand" "=x")
9511 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9512 (match_operand:V8HI 1 "register_operand" "0")
9513 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9515 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9516 [(set_attr "type" "ssemov")
9517 (set_attr "prefix_extra" "1")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "mode" "TI")])
9521 (define_insn "sse4_1_phminposuw"
9522 [(set (match_operand:V8HI 0 "register_operand" "=x")
9523 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9524 UNSPEC_PHMINPOSUW))]
9526 "%vphminposuw\t{%1, %0|%0, %1}"
9527 [(set_attr "type" "sselog1")
9528 (set_attr "prefix_extra" "1")
9529 (set_attr "prefix" "maybe_vex")
9530 (set_attr "mode" "TI")])
9532 (define_insn "sse4_1_<code>v8qiv8hi2"
9533 [(set (match_operand:V8HI 0 "register_operand" "=x")
9536 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9537 (parallel [(const_int 0)
9546 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9547 [(set_attr "type" "ssemov")
9548 (set_attr "prefix_extra" "1")
9549 (set_attr "prefix" "maybe_vex")
9550 (set_attr "mode" "TI")])
9552 (define_insn "sse4_1_<code>v4qiv4si2"
9553 [(set (match_operand:V4SI 0 "register_operand" "=x")
9556 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9557 (parallel [(const_int 0)
9562 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9563 [(set_attr "type" "ssemov")
9564 (set_attr "prefix_extra" "1")
9565 (set_attr "prefix" "maybe_vex")
9566 (set_attr "mode" "TI")])
9568 (define_insn "sse4_1_<code>v4hiv4si2"
9569 [(set (match_operand:V4SI 0 "register_operand" "=x")
9572 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9573 (parallel [(const_int 0)
9578 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9579 [(set_attr "type" "ssemov")
9580 (set_attr "prefix_extra" "1")
9581 (set_attr "prefix" "maybe_vex")
9582 (set_attr "mode" "TI")])
9584 (define_insn "sse4_1_<code>v2qiv2di2"
9585 [(set (match_operand:V2DI 0 "register_operand" "=x")
9588 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9589 (parallel [(const_int 0)
9592 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9593 [(set_attr "type" "ssemov")
9594 (set_attr "prefix_extra" "1")
9595 (set_attr "prefix" "maybe_vex")
9596 (set_attr "mode" "TI")])
9598 (define_insn "sse4_1_<code>v2hiv2di2"
9599 [(set (match_operand:V2DI 0 "register_operand" "=x")
9602 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9603 (parallel [(const_int 0)
9606 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9607 [(set_attr "type" "ssemov")
9608 (set_attr "prefix_extra" "1")
9609 (set_attr "prefix" "maybe_vex")
9610 (set_attr "mode" "TI")])
9612 (define_insn "sse4_1_<code>v2siv2di2"
9613 [(set (match_operand:V2DI 0 "register_operand" "=x")
9616 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9617 (parallel [(const_int 0)
9620 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9621 [(set_attr "type" "ssemov")
9622 (set_attr "prefix_extra" "1")
9623 (set_attr "prefix" "maybe_vex")
9624 (set_attr "mode" "TI")])
9626 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9627 ;; setting FLAGS_REG. But it is not a really compare instruction.
9628 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9629 [(set (reg:CC FLAGS_REG)
9630 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9631 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9634 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9635 [(set_attr "type" "ssecomi")
9636 (set_attr "prefix_extra" "1")
9637 (set_attr "prefix" "vex")
9638 (set_attr "mode" "<MODE>")])
9640 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9641 ;; But it is not a really compare instruction.
9642 (define_insn "avx_ptest256"
9643 [(set (reg:CC FLAGS_REG)
9644 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9645 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9648 "vptest\t{%1, %0|%0, %1}"
9649 [(set_attr "type" "ssecomi")
9650 (set_attr "prefix_extra" "1")
9651 (set_attr "prefix" "vex")
9652 (set_attr "mode" "OI")])
9654 (define_insn "sse4_1_ptest"
9655 [(set (reg:CC FLAGS_REG)
9656 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9657 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9660 "%vptest\t{%1, %0|%0, %1}"
9661 [(set_attr "type" "ssecomi")
9662 (set_attr "prefix_extra" "1")
9663 (set_attr "prefix" "maybe_vex")
9664 (set_attr "mode" "TI")])
9666 (define_insn "avx_round<ssemodesuffix>256"
9667 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9668 (unspec:AVX256MODEF2P
9669 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9670 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9673 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9674 [(set_attr "type" "ssecvt")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "length_immediate" "1")
9677 (set_attr "prefix" "vex")
9678 (set_attr "mode" "<MODE>")])
9680 (define_insn "sse4_1_round<ssemodesuffix>"
9681 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9683 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9684 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9687 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9688 [(set_attr "type" "ssecvt")
9689 (set_attr "prefix_data16" "1")
9690 (set_attr "prefix_extra" "1")
9691 (set_attr "length_immediate" "1")
9692 (set_attr "prefix" "maybe_vex")
9693 (set_attr "mode" "<MODE>")])
9695 (define_insn "*avx_round<ssescalarmodesuffix>"
9696 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9697 (vec_merge:SSEMODEF2P
9699 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9700 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9702 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9705 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9706 [(set_attr "type" "ssecvt")
9707 (set_attr "prefix_extra" "1")
9708 (set_attr "length_immediate" "1")
9709 (set_attr "prefix" "vex")
9710 (set_attr "mode" "<MODE>")])
9712 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9713 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9714 (vec_merge:SSEMODEF2P
9716 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9717 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9719 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9722 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9723 [(set_attr "type" "ssecvt")
9724 (set_attr "prefix_data16" "1")
9725 (set_attr "prefix_extra" "1")
9726 (set_attr "length_immediate" "1")
9727 (set_attr "mode" "<MODE>")])
9729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9731 ;; Intel SSE4.2 string/text processing instructions
9733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9735 (define_insn_and_split "sse4_2_pcmpestr"
9736 [(set (match_operand:SI 0 "register_operand" "=c,c")
9738 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9739 (match_operand:SI 3 "register_operand" "a,a")
9740 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9741 (match_operand:SI 5 "register_operand" "d,d")
9742 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9744 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9752 (set (reg:CC FLAGS_REG)
9761 && can_create_pseudo_p ()"
9766 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9767 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9768 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9771 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9772 operands[3], operands[4],
9773 operands[5], operands[6]));
9775 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9776 operands[3], operands[4],
9777 operands[5], operands[6]));
9778 if (flags && !(ecx || xmm0))
9779 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9780 operands[2], operands[3],
9781 operands[4], operands[5],
9785 [(set_attr "type" "sselog")
9786 (set_attr "prefix_data16" "1")
9787 (set_attr "prefix_extra" "1")
9788 (set_attr "length_immediate" "1")
9789 (set_attr "memory" "none,load")
9790 (set_attr "mode" "TI")])
9792 (define_insn "sse4_2_pcmpestri"
9793 [(set (match_operand:SI 0 "register_operand" "=c,c")
9795 [(match_operand:V16QI 1 "register_operand" "x,x")
9796 (match_operand:SI 2 "register_operand" "a,a")
9797 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9798 (match_operand:SI 4 "register_operand" "d,d")
9799 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9801 (set (reg:CC FLAGS_REG)
9810 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9811 [(set_attr "type" "sselog")
9812 (set_attr "prefix_data16" "1")
9813 (set_attr "prefix_extra" "1")
9814 (set_attr "prefix" "maybe_vex")
9815 (set_attr "length_immediate" "1")
9816 (set_attr "memory" "none,load")
9817 (set_attr "mode" "TI")])
9819 (define_insn "sse4_2_pcmpestrm"
9820 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9822 [(match_operand:V16QI 1 "register_operand" "x,x")
9823 (match_operand:SI 2 "register_operand" "a,a")
9824 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9825 (match_operand:SI 4 "register_operand" "d,d")
9826 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9828 (set (reg:CC FLAGS_REG)
9837 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9838 [(set_attr "type" "sselog")
9839 (set_attr "prefix_data16" "1")
9840 (set_attr "prefix_extra" "1")
9841 (set_attr "length_immediate" "1")
9842 (set_attr "prefix" "maybe_vex")
9843 (set_attr "memory" "none,load")
9844 (set_attr "mode" "TI")])
9846 (define_insn "sse4_2_pcmpestr_cconly"
9847 [(set (reg:CC FLAGS_REG)
9849 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9850 (match_operand:SI 3 "register_operand" "a,a,a,a")
9851 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9852 (match_operand:SI 5 "register_operand" "d,d,d,d")
9853 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9855 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9856 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9859 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9860 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9861 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9862 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9863 [(set_attr "type" "sselog")
9864 (set_attr "prefix_data16" "1")
9865 (set_attr "prefix_extra" "1")
9866 (set_attr "length_immediate" "1")
9867 (set_attr "memory" "none,load,none,load")
9868 (set_attr "prefix" "maybe_vex")
9869 (set_attr "mode" "TI")])
9871 (define_insn_and_split "sse4_2_pcmpistr"
9872 [(set (match_operand:SI 0 "register_operand" "=c,c")
9874 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9875 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9876 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9878 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9884 (set (reg:CC FLAGS_REG)
9891 && can_create_pseudo_p ()"
9896 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9897 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9898 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9901 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9902 operands[3], operands[4]));
9904 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9905 operands[3], operands[4]));
9906 if (flags && !(ecx || xmm0))
9907 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9908 operands[2], operands[3],
9912 [(set_attr "type" "sselog")
9913 (set_attr "prefix_data16" "1")
9914 (set_attr "prefix_extra" "1")
9915 (set_attr "length_immediate" "1")
9916 (set_attr "memory" "none,load")
9917 (set_attr "mode" "TI")])
9919 (define_insn "sse4_2_pcmpistri"
9920 [(set (match_operand:SI 0 "register_operand" "=c,c")
9922 [(match_operand:V16QI 1 "register_operand" "x,x")
9923 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9924 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9926 (set (reg:CC FLAGS_REG)
9933 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9934 [(set_attr "type" "sselog")
9935 (set_attr "prefix_data16" "1")
9936 (set_attr "prefix_extra" "1")
9937 (set_attr "length_immediate" "1")
9938 (set_attr "prefix" "maybe_vex")
9939 (set_attr "memory" "none,load")
9940 (set_attr "mode" "TI")])
9942 (define_insn "sse4_2_pcmpistrm"
9943 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9945 [(match_operand:V16QI 1 "register_operand" "x,x")
9946 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9947 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9949 (set (reg:CC FLAGS_REG)
9956 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9957 [(set_attr "type" "sselog")
9958 (set_attr "prefix_data16" "1")
9959 (set_attr "prefix_extra" "1")
9960 (set_attr "length_immediate" "1")
9961 (set_attr "prefix" "maybe_vex")
9962 (set_attr "memory" "none,load")
9963 (set_attr "mode" "TI")])
9965 (define_insn "sse4_2_pcmpistr_cconly"
9966 [(set (reg:CC FLAGS_REG)
9968 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9969 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9970 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9972 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9973 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9976 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9977 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9978 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9979 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9980 [(set_attr "type" "sselog")
9981 (set_attr "prefix_data16" "1")
9982 (set_attr "prefix_extra" "1")
9983 (set_attr "length_immediate" "1")
9984 (set_attr "memory" "none,load,none,load")
9985 (set_attr "prefix" "maybe_vex")
9986 (set_attr "mode" "TI")])
9988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9992 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9994 ;; XOP parallel integer multiply/add instructions.
9995 ;; Note the XOP multiply/add instructions
9996 ;; a[i] = b[i] * c[i] + d[i];
9997 ;; do not allow the value being added to be a memory operation.
9998 (define_insn "xop_pmacsww"
9999 [(set (match_operand:V8HI 0 "register_operand" "=x")
10002 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10003 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10004 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10006 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10007 [(set_attr "type" "ssemuladd")
10008 (set_attr "mode" "TI")])
10010 (define_insn "xop_pmacssww"
10011 [(set (match_operand:V8HI 0 "register_operand" "=x")
10013 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10014 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10015 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10017 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10018 [(set_attr "type" "ssemuladd")
10019 (set_attr "mode" "TI")])
10021 (define_insn "xop_pmacsdd"
10022 [(set (match_operand:V4SI 0 "register_operand" "=x")
10025 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10026 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10027 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10029 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10030 [(set_attr "type" "ssemuladd")
10031 (set_attr "mode" "TI")])
10033 (define_insn "xop_pmacssdd"
10034 [(set (match_operand:V4SI 0 "register_operand" "=x")
10036 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10037 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10038 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10040 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10041 [(set_attr "type" "ssemuladd")
10042 (set_attr "mode" "TI")])
10044 (define_insn "xop_pmacssdql"
10045 [(set (match_operand:V2DI 0 "register_operand" "=x")
10050 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10051 (parallel [(const_int 1)
10054 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10055 (parallel [(const_int 1)
10057 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10059 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10060 [(set_attr "type" "ssemuladd")
10061 (set_attr "mode" "TI")])
10063 (define_insn "xop_pmacssdqh"
10064 [(set (match_operand:V2DI 0 "register_operand" "=x")
10069 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10070 (parallel [(const_int 0)
10074 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10075 (parallel [(const_int 0)
10077 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10079 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10080 [(set_attr "type" "ssemuladd")
10081 (set_attr "mode" "TI")])
10083 (define_insn "xop_pmacsdql"
10084 [(set (match_operand:V2DI 0 "register_operand" "=x")
10089 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10090 (parallel [(const_int 1)
10094 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10095 (parallel [(const_int 1)
10097 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10099 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10100 [(set_attr "type" "ssemuladd")
10101 (set_attr "mode" "TI")])
10103 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10104 ;; fake it with a multiply/add. In general, we expect the define_split to
10105 ;; occur before register allocation, so we have to handle the corner case where
10106 ;; the target is the same as operands 1/2
10107 (define_insn_and_split "xop_mulv2div2di3_low"
10108 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10112 (match_operand:V4SI 1 "register_operand" "%x")
10113 (parallel [(const_int 1)
10117 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10118 (parallel [(const_int 1)
10119 (const_int 3)])))))]
10122 "&& reload_completed"
10123 [(set (match_dup 0)
10131 (parallel [(const_int 1)
10136 (parallel [(const_int 1)
10140 operands[3] = CONST0_RTX (V2DImode);
10142 [(set_attr "type" "ssemul")
10143 (set_attr "mode" "TI")])
10145 (define_insn "xop_pmacsdqh"
10146 [(set (match_operand:V2DI 0 "register_operand" "=x")
10151 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10152 (parallel [(const_int 0)
10156 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10157 (parallel [(const_int 0)
10159 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10161 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10162 [(set_attr "type" "ssemuladd")
10163 (set_attr "mode" "TI")])
10165 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10166 ;; fake it with a multiply/add. In general, we expect the define_split to
10167 ;; occur before register allocation, so we have to handle the corner case where
10168 ;; the target is the same as either operands[1] or operands[2]
10169 (define_insn_and_split "xop_mulv2div2di3_high"
10170 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10174 (match_operand:V4SI 1 "register_operand" "%x")
10175 (parallel [(const_int 0)
10179 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10180 (parallel [(const_int 0)
10181 (const_int 2)])))))]
10184 "&& reload_completed"
10185 [(set (match_dup 0)
10193 (parallel [(const_int 0)
10198 (parallel [(const_int 0)
10202 operands[3] = CONST0_RTX (V2DImode);
10204 [(set_attr "type" "ssemul")
10205 (set_attr "mode" "TI")])
10207 ;; XOP parallel integer multiply/add instructions for the intrinisics
10208 (define_insn "xop_pmacsswd"
10209 [(set (match_operand:V4SI 0 "register_operand" "=x")
10214 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10215 (parallel [(const_int 1)
10221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10222 (parallel [(const_int 1)
10226 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10228 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10229 [(set_attr "type" "ssemuladd")
10230 (set_attr "mode" "TI")])
10232 (define_insn "xop_pmacswd"
10233 [(set (match_operand:V4SI 0 "register_operand" "=x")
10238 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10239 (parallel [(const_int 1)
10245 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10246 (parallel [(const_int 1)
10250 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10252 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10253 [(set_attr "type" "ssemuladd")
10254 (set_attr "mode" "TI")])
10256 (define_insn "xop_pmadcsswd"
10257 [(set (match_operand:V4SI 0 "register_operand" "=x")
10263 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10264 (parallel [(const_int 0)
10270 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10271 (parallel [(const_int 0)
10279 (parallel [(const_int 1)
10286 (parallel [(const_int 1)
10289 (const_int 7)])))))
10290 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10292 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10293 [(set_attr "type" "ssemuladd")
10294 (set_attr "mode" "TI")])
10296 (define_insn "xop_pmadcswd"
10297 [(set (match_operand:V4SI 0 "register_operand" "=x")
10303 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10304 (parallel [(const_int 0)
10310 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10311 (parallel [(const_int 0)
10319 (parallel [(const_int 1)
10326 (parallel [(const_int 1)
10329 (const_int 7)])))))
10330 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10332 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10333 [(set_attr "type" "ssemuladd")
10334 (set_attr "mode" "TI")])
10336 ;; XOP parallel XMM conditional moves
10337 (define_insn "xop_pcmov_<mode>"
10338 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10339 (if_then_else:SSEMODE
10340 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10341 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10342 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10344 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10345 [(set_attr "type" "sse4arg")])
10347 (define_insn "xop_pcmov_<mode>256"
10348 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10349 (if_then_else:AVX256MODE
10350 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10351 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10352 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10354 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10355 [(set_attr "type" "sse4arg")])
10357 ;; XOP horizontal add/subtract instructions
10358 (define_insn "xop_phaddbw"
10359 [(set (match_operand:V8HI 0 "register_operand" "=x")
10363 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10364 (parallel [(const_int 0)
10375 (parallel [(const_int 1)
10382 (const_int 15)])))))]
10384 "vphaddbw\t{%1, %0|%0, %1}"
10385 [(set_attr "type" "sseiadd1")])
10387 (define_insn "xop_phaddbd"
10388 [(set (match_operand:V4SI 0 "register_operand" "=x")
10393 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10394 (parallel [(const_int 0)
10401 (parallel [(const_int 1)
10404 (const_int 13)]))))
10409 (parallel [(const_int 2)
10416 (parallel [(const_int 3)
10419 (const_int 15)]))))))]
10421 "vphaddbd\t{%1, %0|%0, %1}"
10422 [(set_attr "type" "sseiadd1")])
10424 (define_insn "xop_phaddbq"
10425 [(set (match_operand:V2DI 0 "register_operand" "=x")
10431 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10432 (parallel [(const_int 0)
10437 (parallel [(const_int 1)
10443 (parallel [(const_int 2)
10448 (parallel [(const_int 3)
10449 (const_int 7)])))))
10455 (parallel [(const_int 8)
10460 (parallel [(const_int 9)
10461 (const_int 13)]))))
10466 (parallel [(const_int 10)
10471 (parallel [(const_int 11)
10472 (const_int 15)])))))))]
10474 "vphaddbq\t{%1, %0|%0, %1}"
10475 [(set_attr "type" "sseiadd1")])
10477 (define_insn "xop_phaddwd"
10478 [(set (match_operand:V4SI 0 "register_operand" "=x")
10482 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10483 (parallel [(const_int 0)
10490 (parallel [(const_int 1)
10493 (const_int 7)])))))]
10495 "vphaddwd\t{%1, %0|%0, %1}"
10496 [(set_attr "type" "sseiadd1")])
10498 (define_insn "xop_phaddwq"
10499 [(set (match_operand:V2DI 0 "register_operand" "=x")
10504 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10505 (parallel [(const_int 0)
10510 (parallel [(const_int 1)
10516 (parallel [(const_int 2)
10521 (parallel [(const_int 3)
10522 (const_int 7)]))))))]
10524 "vphaddwq\t{%1, %0|%0, %1}"
10525 [(set_attr "type" "sseiadd1")])
10527 (define_insn "xop_phadddq"
10528 [(set (match_operand:V2DI 0 "register_operand" "=x")
10532 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10533 (parallel [(const_int 0)
10538 (parallel [(const_int 1)
10539 (const_int 3)])))))]
10541 "vphadddq\t{%1, %0|%0, %1}"
10542 [(set_attr "type" "sseiadd1")])
10544 (define_insn "xop_phaddubw"
10545 [(set (match_operand:V8HI 0 "register_operand" "=x")
10549 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10550 (parallel [(const_int 0)
10561 (parallel [(const_int 1)
10568 (const_int 15)])))))]
10570 "vphaddubw\t{%1, %0|%0, %1}"
10571 [(set_attr "type" "sseiadd1")])
10573 (define_insn "xop_phaddubd"
10574 [(set (match_operand:V4SI 0 "register_operand" "=x")
10579 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10580 (parallel [(const_int 0)
10587 (parallel [(const_int 1)
10590 (const_int 13)]))))
10595 (parallel [(const_int 2)
10602 (parallel [(const_int 3)
10605 (const_int 15)]))))))]
10607 "vphaddubd\t{%1, %0|%0, %1}"
10608 [(set_attr "type" "sseiadd1")])
10610 (define_insn "xop_phaddubq"
10611 [(set (match_operand:V2DI 0 "register_operand" "=x")
10617 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10618 (parallel [(const_int 0)
10623 (parallel [(const_int 1)
10629 (parallel [(const_int 2)
10634 (parallel [(const_int 3)
10635 (const_int 7)])))))
10641 (parallel [(const_int 8)
10646 (parallel [(const_int 9)
10647 (const_int 13)]))))
10652 (parallel [(const_int 10)
10657 (parallel [(const_int 11)
10658 (const_int 15)])))))))]
10660 "vphaddubq\t{%1, %0|%0, %1}"
10661 [(set_attr "type" "sseiadd1")])
10663 (define_insn "xop_phadduwd"
10664 [(set (match_operand:V4SI 0 "register_operand" "=x")
10668 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10669 (parallel [(const_int 0)
10676 (parallel [(const_int 1)
10679 (const_int 7)])))))]
10681 "vphadduwd\t{%1, %0|%0, %1}"
10682 [(set_attr "type" "sseiadd1")])
10684 (define_insn "xop_phadduwq"
10685 [(set (match_operand:V2DI 0 "register_operand" "=x")
10690 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10691 (parallel [(const_int 0)
10696 (parallel [(const_int 1)
10702 (parallel [(const_int 2)
10707 (parallel [(const_int 3)
10708 (const_int 7)]))))))]
10710 "vphadduwq\t{%1, %0|%0, %1}"
10711 [(set_attr "type" "sseiadd1")])
10713 (define_insn "xop_phaddudq"
10714 [(set (match_operand:V2DI 0 "register_operand" "=x")
10718 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10719 (parallel [(const_int 0)
10724 (parallel [(const_int 1)
10725 (const_int 3)])))))]
10727 "vphaddudq\t{%1, %0|%0, %1}"
10728 [(set_attr "type" "sseiadd1")])
10730 (define_insn "xop_phsubbw"
10731 [(set (match_operand:V8HI 0 "register_operand" "=x")
10735 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10736 (parallel [(const_int 0)
10747 (parallel [(const_int 1)
10754 (const_int 15)])))))]
10756 "vphsubbw\t{%1, %0|%0, %1}"
10757 [(set_attr "type" "sseiadd1")])
10759 (define_insn "xop_phsubwd"
10760 [(set (match_operand:V4SI 0 "register_operand" "=x")
10764 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10765 (parallel [(const_int 0)
10772 (parallel [(const_int 1)
10775 (const_int 7)])))))]
10777 "vphsubwd\t{%1, %0|%0, %1}"
10778 [(set_attr "type" "sseiadd1")])
10780 (define_insn "xop_phsubdq"
10781 [(set (match_operand:V2DI 0 "register_operand" "=x")
10785 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10786 (parallel [(const_int 0)
10791 (parallel [(const_int 1)
10792 (const_int 3)])))))]
10794 "vphsubdq\t{%1, %0|%0, %1}"
10795 [(set_attr "type" "sseiadd1")])
10797 ;; XOP permute instructions
10798 (define_insn "xop_pperm"
10799 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10801 [(match_operand:V16QI 1 "register_operand" "x,x")
10802 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10803 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10804 UNSPEC_XOP_PERMUTE))]
10805 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10806 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10807 [(set_attr "type" "sse4arg")
10808 (set_attr "mode" "TI")])
10810 ;; XOP pack instructions that combine two vectors into a smaller vector
10811 (define_insn "xop_pperm_pack_v2di_v4si"
10812 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10815 (match_operand:V2DI 1 "register_operand" "x,x"))
10817 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10818 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10819 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10820 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10821 [(set_attr "type" "sse4arg")
10822 (set_attr "mode" "TI")])
10824 (define_insn "xop_pperm_pack_v4si_v8hi"
10825 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10828 (match_operand:V4SI 1 "register_operand" "x,x"))
10830 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10831 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10832 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10833 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10834 [(set_attr "type" "sse4arg")
10835 (set_attr "mode" "TI")])
10837 (define_insn "xop_pperm_pack_v8hi_v16qi"
10838 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10841 (match_operand:V8HI 1 "register_operand" "x,x"))
10843 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10844 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10845 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10846 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10847 [(set_attr "type" "sse4arg")
10848 (set_attr "mode" "TI")])
10850 ;; XOP packed rotate instructions
10851 (define_expand "rotl<mode>3"
10852 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10853 (rotate:SSEMODE1248
10854 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10855 (match_operand:SI 2 "general_operand")))]
10858 /* If we were given a scalar, convert it to parallel */
10859 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10861 rtvec vs = rtvec_alloc (<ssescalarnum>);
10862 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10863 rtx reg = gen_reg_rtx (<MODE>mode);
10864 rtx op2 = operands[2];
10867 if (GET_MODE (op2) != <ssescalarmode>mode)
10869 op2 = gen_reg_rtx (<ssescalarmode>mode);
10870 convert_move (op2, operands[2], false);
10873 for (i = 0; i < <ssescalarnum>; i++)
10874 RTVEC_ELT (vs, i) = op2;
10876 emit_insn (gen_vec_init<mode> (reg, par));
10877 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10882 (define_expand "rotr<mode>3"
10883 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10884 (rotatert:SSEMODE1248
10885 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10886 (match_operand:SI 2 "general_operand")))]
10889 /* If we were given a scalar, convert it to parallel */
10890 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10892 rtvec vs = rtvec_alloc (<ssescalarnum>);
10893 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10894 rtx neg = gen_reg_rtx (<MODE>mode);
10895 rtx reg = gen_reg_rtx (<MODE>mode);
10896 rtx op2 = operands[2];
10899 if (GET_MODE (op2) != <ssescalarmode>mode)
10901 op2 = gen_reg_rtx (<ssescalarmode>mode);
10902 convert_move (op2, operands[2], false);
10905 for (i = 0; i < <ssescalarnum>; i++)
10906 RTVEC_ELT (vs, i) = op2;
10908 emit_insn (gen_vec_init<mode> (reg, par));
10909 emit_insn (gen_neg<mode>2 (neg, reg));
10910 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10915 (define_insn "xop_rotl<mode>3"
10916 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10917 (rotate:SSEMODE1248
10918 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10919 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10921 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10922 [(set_attr "type" "sseishft")
10923 (set_attr "length_immediate" "1")
10924 (set_attr "mode" "TI")])
10926 (define_insn "xop_rotr<mode>3"
10927 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10928 (rotatert:SSEMODE1248
10929 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10930 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10933 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10934 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10936 [(set_attr "type" "sseishft")
10937 (set_attr "length_immediate" "1")
10938 (set_attr "mode" "TI")])
10940 (define_expand "vrotr<mode>3"
10941 [(match_operand:SSEMODE1248 0 "register_operand" "")
10942 (match_operand:SSEMODE1248 1 "register_operand" "")
10943 (match_operand:SSEMODE1248 2 "register_operand" "")]
10946 rtx reg = gen_reg_rtx (<MODE>mode);
10947 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10948 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10952 (define_expand "vrotl<mode>3"
10953 [(match_operand:SSEMODE1248 0 "register_operand" "")
10954 (match_operand:SSEMODE1248 1 "register_operand" "")
10955 (match_operand:SSEMODE1248 2 "register_operand" "")]
10958 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10962 (define_insn "xop_vrotl<mode>3"
10963 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10964 (if_then_else:SSEMODE1248
10966 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10968 (rotate:SSEMODE1248
10969 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10971 (rotatert:SSEMODE1248
10973 (neg:SSEMODE1248 (match_dup 2)))))]
10974 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10975 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10976 [(set_attr "type" "sseishft")
10977 (set_attr "prefix_data16" "0")
10978 (set_attr "prefix_extra" "2")
10979 (set_attr "mode" "TI")])
10981 ;; XOP packed shift instructions.
10982 ;; FIXME: add V2DI back in
10983 (define_expand "vlshr<mode>3"
10984 [(match_operand:SSEMODE124 0 "register_operand" "")
10985 (match_operand:SSEMODE124 1 "register_operand" "")
10986 (match_operand:SSEMODE124 2 "register_operand" "")]
10989 rtx neg = gen_reg_rtx (<MODE>mode);
10990 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10991 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
10995 (define_expand "vashr<mode>3"
10996 [(match_operand:SSEMODE124 0 "register_operand" "")
10997 (match_operand:SSEMODE124 1 "register_operand" "")
10998 (match_operand:SSEMODE124 2 "register_operand" "")]
11001 rtx neg = gen_reg_rtx (<MODE>mode);
11002 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11003 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11007 (define_expand "vashl<mode>3"
11008 [(match_operand:SSEMODE124 0 "register_operand" "")
11009 (match_operand:SSEMODE124 1 "register_operand" "")
11010 (match_operand:SSEMODE124 2 "register_operand" "")]
11013 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11017 (define_insn "xop_ashl<mode>3"
11018 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11019 (if_then_else:SSEMODE1248
11021 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11023 (ashift:SSEMODE1248
11024 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11026 (ashiftrt:SSEMODE1248
11028 (neg:SSEMODE1248 (match_dup 2)))))]
11029 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11030 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11031 [(set_attr "type" "sseishft")
11032 (set_attr "prefix_data16" "0")
11033 (set_attr "prefix_extra" "2")
11034 (set_attr "mode" "TI")])
11036 (define_insn "xop_lshl<mode>3"
11037 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11038 (if_then_else:SSEMODE1248
11040 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11042 (ashift:SSEMODE1248
11043 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11045 (lshiftrt:SSEMODE1248
11047 (neg:SSEMODE1248 (match_dup 2)))))]
11048 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11049 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11050 [(set_attr "type" "sseishft")
11051 (set_attr "prefix_data16" "0")
11052 (set_attr "prefix_extra" "2")
11053 (set_attr "mode" "TI")])
11055 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11056 (define_expand "ashlv16qi3"
11057 [(match_operand:V16QI 0 "register_operand" "")
11058 (match_operand:V16QI 1 "register_operand" "")
11059 (match_operand:SI 2 "nonmemory_operand" "")]
11062 rtvec vs = rtvec_alloc (16);
11063 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11064 rtx reg = gen_reg_rtx (V16QImode);
11066 for (i = 0; i < 16; i++)
11067 RTVEC_ELT (vs, i) = operands[2];
11069 emit_insn (gen_vec_initv16qi (reg, par));
11070 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11074 (define_expand "lshlv16qi3"
11075 [(match_operand:V16QI 0 "register_operand" "")
11076 (match_operand:V16QI 1 "register_operand" "")
11077 (match_operand:SI 2 "nonmemory_operand" "")]
11080 rtvec vs = rtvec_alloc (16);
11081 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11082 rtx reg = gen_reg_rtx (V16QImode);
11084 for (i = 0; i < 16; i++)
11085 RTVEC_ELT (vs, i) = operands[2];
11087 emit_insn (gen_vec_initv16qi (reg, par));
11088 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11092 (define_expand "ashrv16qi3"
11093 [(match_operand:V16QI 0 "register_operand" "")
11094 (match_operand:V16QI 1 "register_operand" "")
11095 (match_operand:SI 2 "nonmemory_operand" "")]
11098 rtvec vs = rtvec_alloc (16);
11099 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11100 rtx reg = gen_reg_rtx (V16QImode);
11102 rtx ele = ((CONST_INT_P (operands[2]))
11103 ? GEN_INT (- INTVAL (operands[2]))
11106 for (i = 0; i < 16; i++)
11107 RTVEC_ELT (vs, i) = ele;
11109 emit_insn (gen_vec_initv16qi (reg, par));
11111 if (!CONST_INT_P (operands[2]))
11113 rtx neg = gen_reg_rtx (V16QImode);
11114 emit_insn (gen_negv16qi2 (neg, reg));
11115 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11118 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11123 (define_expand "ashrv2di3"
11124 [(match_operand:V2DI 0 "register_operand" "")
11125 (match_operand:V2DI 1 "register_operand" "")
11126 (match_operand:DI 2 "nonmemory_operand" "")]
11129 rtvec vs = rtvec_alloc (2);
11130 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11131 rtx reg = gen_reg_rtx (V2DImode);
11134 if (CONST_INT_P (operands[2]))
11135 ele = GEN_INT (- INTVAL (operands[2]));
11136 else if (GET_MODE (operands[2]) != DImode)
11138 rtx move = gen_reg_rtx (DImode);
11139 ele = gen_reg_rtx (DImode);
11140 convert_move (move, operands[2], false);
11141 emit_insn (gen_negdi2 (ele, move));
11145 ele = gen_reg_rtx (DImode);
11146 emit_insn (gen_negdi2 (ele, operands[2]));
11149 RTVEC_ELT (vs, 0) = ele;
11150 RTVEC_ELT (vs, 1) = ele;
11151 emit_insn (gen_vec_initv2di (reg, par));
11152 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11156 ;; XOP FRCZ support
11158 (define_insn "xop_frcz<mode>2"
11159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11161 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11164 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11165 [(set_attr "type" "ssecvt1")
11166 (set_attr "mode" "<MODE>")])
11169 (define_insn "xop_vmfrcz<mode>2"
11170 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11171 (vec_merge:SSEMODEF2P
11173 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11175 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11178 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11179 [(set_attr "type" "ssecvt1")
11180 (set_attr "mode" "<MODE>")])
11182 (define_insn "xop_frcz<mode>2256"
11183 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11185 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11188 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11189 [(set_attr "type" "ssecvt1")
11190 (set_attr "mode" "<MODE>")])
11192 (define_insn "xop_maskcmp<mode>3"
11193 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11194 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11195 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11196 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11198 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11199 [(set_attr "type" "sse4arg")
11200 (set_attr "prefix_data16" "0")
11201 (set_attr "prefix_rep" "0")
11202 (set_attr "prefix_extra" "2")
11203 (set_attr "length_immediate" "1")
11204 (set_attr "mode" "TI")])
11206 (define_insn "xop_maskcmp_uns<mode>3"
11207 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11208 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11209 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11210 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11212 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11213 [(set_attr "type" "ssecmp")
11214 (set_attr "prefix_data16" "0")
11215 (set_attr "prefix_rep" "0")
11216 (set_attr "prefix_extra" "2")
11217 (set_attr "length_immediate" "1")
11218 (set_attr "mode" "TI")])
11220 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11221 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11222 ;; the exact instruction generated for the intrinsic.
11223 (define_insn "xop_maskcmp_uns2<mode>3"
11224 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11225 (unspec:SSEMODE1248
11226 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11227 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11228 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11229 UNSPEC_XOP_UNSIGNED_CMP))]
11231 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11232 [(set_attr "type" "ssecmp")
11233 (set_attr "prefix_data16" "0")
11234 (set_attr "prefix_extra" "2")
11235 (set_attr "length_immediate" "1")
11236 (set_attr "mode" "TI")])
11238 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11239 ;; being added here to be complete.
11240 (define_insn "xop_pcom_tf<mode>3"
11241 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11242 (unspec:SSEMODE1248
11243 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11244 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11245 (match_operand:SI 3 "const_int_operand" "n")]
11246 UNSPEC_XOP_TRUEFALSE))]
11249 return ((INTVAL (operands[3]) != 0)
11250 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11251 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11253 [(set_attr "type" "ssecmp")
11254 (set_attr "prefix_data16" "0")
11255 (set_attr "prefix_extra" "2")
11256 (set_attr "length_immediate" "1")
11257 (set_attr "mode" "TI")])
11259 (define_insn "xop_vpermil2<mode>3"
11260 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11262 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11263 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11264 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11265 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11268 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11269 [(set_attr "type" "sse4arg")
11270 (set_attr "length_immediate" "1")
11271 (set_attr "mode" "<MODE>")])
11273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11274 (define_insn "*avx_aesenc"
11275 [(set (match_operand:V2DI 0 "register_operand" "=x")
11276 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11277 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11279 "TARGET_AES && TARGET_AVX"
11280 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11281 [(set_attr "type" "sselog1")
11282 (set_attr "prefix_extra" "1")
11283 (set_attr "prefix" "vex")
11284 (set_attr "mode" "TI")])
11286 (define_insn "aesenc"
11287 [(set (match_operand:V2DI 0 "register_operand" "=x")
11288 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11289 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11292 "aesenc\t{%2, %0|%0, %2}"
11293 [(set_attr "type" "sselog1")
11294 (set_attr "prefix_extra" "1")
11295 (set_attr "mode" "TI")])
11297 (define_insn "*avx_aesenclast"
11298 [(set (match_operand:V2DI 0 "register_operand" "=x")
11299 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11300 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11301 UNSPEC_AESENCLAST))]
11302 "TARGET_AES && TARGET_AVX"
11303 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11304 [(set_attr "type" "sselog1")
11305 (set_attr "prefix_extra" "1")
11306 (set_attr "prefix" "vex")
11307 (set_attr "mode" "TI")])
11309 (define_insn "aesenclast"
11310 [(set (match_operand:V2DI 0 "register_operand" "=x")
11311 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11312 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11313 UNSPEC_AESENCLAST))]
11315 "aesenclast\t{%2, %0|%0, %2}"
11316 [(set_attr "type" "sselog1")
11317 (set_attr "prefix_extra" "1")
11318 (set_attr "mode" "TI")])
11320 (define_insn "*avx_aesdec"
11321 [(set (match_operand:V2DI 0 "register_operand" "=x")
11322 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11323 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11325 "TARGET_AES && TARGET_AVX"
11326 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11327 [(set_attr "type" "sselog1")
11328 (set_attr "prefix_extra" "1")
11329 (set_attr "prefix" "vex")
11330 (set_attr "mode" "TI")])
11332 (define_insn "aesdec"
11333 [(set (match_operand:V2DI 0 "register_operand" "=x")
11334 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11335 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11338 "aesdec\t{%2, %0|%0, %2}"
11339 [(set_attr "type" "sselog1")
11340 (set_attr "prefix_extra" "1")
11341 (set_attr "mode" "TI")])
11343 (define_insn "*avx_aesdeclast"
11344 [(set (match_operand:V2DI 0 "register_operand" "=x")
11345 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11346 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11347 UNSPEC_AESDECLAST))]
11348 "TARGET_AES && TARGET_AVX"
11349 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11350 [(set_attr "type" "sselog1")
11351 (set_attr "prefix_extra" "1")
11352 (set_attr "prefix" "vex")
11353 (set_attr "mode" "TI")])
11355 (define_insn "aesdeclast"
11356 [(set (match_operand:V2DI 0 "register_operand" "=x")
11357 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11358 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11359 UNSPEC_AESDECLAST))]
11361 "aesdeclast\t{%2, %0|%0, %2}"
11362 [(set_attr "type" "sselog1")
11363 (set_attr "prefix_extra" "1")
11364 (set_attr "mode" "TI")])
11366 (define_insn "aesimc"
11367 [(set (match_operand:V2DI 0 "register_operand" "=x")
11368 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11371 "%vaesimc\t{%1, %0|%0, %1}"
11372 [(set_attr "type" "sselog1")
11373 (set_attr "prefix_extra" "1")
11374 (set_attr "prefix" "maybe_vex")
11375 (set_attr "mode" "TI")])
11377 (define_insn "aeskeygenassist"
11378 [(set (match_operand:V2DI 0 "register_operand" "=x")
11379 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11380 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11381 UNSPEC_AESKEYGENASSIST))]
11383 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11384 [(set_attr "type" "sselog1")
11385 (set_attr "prefix_extra" "1")
11386 (set_attr "length_immediate" "1")
11387 (set_attr "prefix" "maybe_vex")
11388 (set_attr "mode" "TI")])
11390 (define_insn "*vpclmulqdq"
11391 [(set (match_operand:V2DI 0 "register_operand" "=x")
11392 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11393 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11394 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11396 "TARGET_PCLMUL && TARGET_AVX"
11397 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11398 [(set_attr "type" "sselog1")
11399 (set_attr "prefix_extra" "1")
11400 (set_attr "length_immediate" "1")
11401 (set_attr "prefix" "vex")
11402 (set_attr "mode" "TI")])
11404 (define_insn "pclmulqdq"
11405 [(set (match_operand:V2DI 0 "register_operand" "=x")
11406 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11407 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11408 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11411 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11412 [(set_attr "type" "sselog1")
11413 (set_attr "prefix_extra" "1")
11414 (set_attr "length_immediate" "1")
11415 (set_attr "mode" "TI")])
11417 (define_expand "avx_vzeroall"
11418 [(match_par_dup 0 [(const_int 0)])]
11421 int nregs = TARGET_64BIT ? 16 : 8;
11424 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11426 XVECEXP (operands[0], 0, 0)
11427 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11430 for (regno = 0; regno < nregs; regno++)
11431 XVECEXP (operands[0], 0, regno + 1)
11432 = gen_rtx_SET (VOIDmode,
11433 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11434 CONST0_RTX (V8SImode));
11437 (define_insn "*avx_vzeroall"
11438 [(match_parallel 0 "vzeroall_operation"
11439 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11442 [(set_attr "type" "sse")
11443 (set_attr "modrm" "0")
11444 (set_attr "memory" "none")
11445 (set_attr "prefix" "vex")
11446 (set_attr "mode" "OI")])
11448 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11449 (define_expand "avx_vzeroupper"
11450 [(match_par_dup 0 [(const_int 0)])]
11453 int nregs = TARGET_64BIT ? 16 : 8;
11456 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11458 XVECEXP (operands[0], 0, 0)
11459 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11460 UNSPECV_VZEROUPPER);
11462 for (regno = 0; regno < nregs; regno++)
11463 XVECEXP (operands[0], 0, regno + 1)
11464 = gen_rtx_CLOBBER (VOIDmode,
11465 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11468 (define_insn "*avx_vzeroupper"
11469 [(match_parallel 0 "vzeroupper_operation"
11470 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11473 [(set_attr "type" "sse")
11474 (set_attr "modrm" "0")
11475 (set_attr "memory" "none")
11476 (set_attr "prefix" "vex")
11477 (set_attr "mode" "OI")])
11479 (define_insn_and_split "vec_dup<mode>"
11480 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11481 (vec_duplicate:AVX256MODE24P
11482 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11485 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11487 "&& reload_completed && REG_P (operands[1])"
11488 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11489 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11490 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11491 [(set_attr "type" "ssemov")
11492 (set_attr "prefix_extra" "1")
11493 (set_attr "prefix" "vex")
11494 (set_attr "mode" "V8SF")])
11496 (define_insn "avx_vbroadcastf128_<mode>"
11497 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11498 (vec_concat:AVX256MODE
11499 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11503 vbroadcastf128\t{%1, %0|%0, %1}
11504 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11505 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11506 [(set_attr "type" "ssemov,sselog1,sselog1")
11507 (set_attr "prefix_extra" "1")
11508 (set_attr "length_immediate" "0,1,1")
11509 (set_attr "prefix" "vex")
11510 (set_attr "mode" "V4SF,V8SF,V8SF")])
11512 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11513 ;; If it so happens that the input is in memory, use vbroadcast.
11514 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11515 (define_insn "*avx_vperm_broadcast_v4sf"
11516 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11518 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11519 (match_parallel 2 "avx_vbroadcast_operand"
11520 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11523 int elt = INTVAL (operands[3]);
11524 switch (which_alternative)
11528 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11529 return "vbroadcastss\t{%1, %0|%0, %1}";
11531 operands[2] = GEN_INT (elt * 0x55);
11532 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11534 gcc_unreachable ();
11537 [(set_attr "type" "ssemov,ssemov,sselog1")
11538 (set_attr "prefix_extra" "1")
11539 (set_attr "length_immediate" "0,0,1")
11540 (set_attr "prefix" "vex")
11541 (set_attr "mode" "SF,SF,V4SF")])
11543 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11544 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11545 (vec_select:AVX256MODEF2P
11546 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11547 (match_parallel 2 "avx_vbroadcast_operand"
11548 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11551 "&& reload_completed"
11552 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11554 rtx op0 = operands[0], op1 = operands[1];
11555 int elt = INTVAL (operands[3]);
11561 /* Shuffle element we care about into all elements of the 128-bit lane.
11562 The other lane gets shuffled too, but we don't care. */
11563 if (<MODE>mode == V4DFmode)
11564 mask = (elt & 1 ? 15 : 0);
11566 mask = (elt & 3) * 0x55;
11567 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11569 /* Shuffle the lane we care about into both lanes of the dest. */
11570 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11571 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11575 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11576 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11579 (define_expand "avx_vpermil<mode>"
11580 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11581 (vec_select:AVXMODEFDP
11582 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11583 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11586 int mask = INTVAL (operands[2]);
11587 rtx perm[<ssescalarnum>];
11589 perm[0] = GEN_INT (mask & 1);
11590 perm[1] = GEN_INT ((mask >> 1) & 1);
11591 if (<MODE>mode == V4DFmode)
11593 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11594 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11598 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11601 (define_expand "avx_vpermil<mode>"
11602 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11603 (vec_select:AVXMODEFSP
11604 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11605 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11608 int mask = INTVAL (operands[2]);
11609 rtx perm[<ssescalarnum>];
11611 perm[0] = GEN_INT (mask & 3);
11612 perm[1] = GEN_INT ((mask >> 2) & 3);
11613 perm[2] = GEN_INT ((mask >> 4) & 3);
11614 perm[3] = GEN_INT ((mask >> 6) & 3);
11615 if (<MODE>mode == V8SFmode)
11617 perm[4] = GEN_INT ((mask & 3) + 4);
11618 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11619 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11620 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11624 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11627 (define_insn "*avx_vpermilp<mode>"
11628 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11629 (vec_select:AVXMODEF2P
11630 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11631 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11632 [(match_operand 3 "const_int_operand" "")])))]
11635 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11636 operands[2] = GEN_INT (mask);
11637 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11639 [(set_attr "type" "sselog")
11640 (set_attr "prefix_extra" "1")
11641 (set_attr "length_immediate" "1")
11642 (set_attr "prefix" "vex")
11643 (set_attr "mode" "<MODE>")])
11645 (define_insn "avx_vpermilvar<mode>3"
11646 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11648 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11649 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11652 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11653 [(set_attr "type" "sselog")
11654 (set_attr "prefix_extra" "1")
11655 (set_attr "prefix" "vex")
11656 (set_attr "mode" "<MODE>")])
11658 (define_expand "avx_vperm2f128<mode>3"
11659 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11660 (unspec:AVX256MODE2P
11661 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11662 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11663 (match_operand:SI 3 "const_0_to_255_operand" "")]
11664 UNSPEC_VPERMIL2F128))]
11667 int mask = INTVAL (operands[3]);
11668 if ((mask & 0x88) == 0)
11670 rtx perm[<ssescalarnum>], t1, t2;
11671 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11673 base = (mask & 3) * nelt2;
11674 for (i = 0; i < nelt2; ++i)
11675 perm[i] = GEN_INT (base + i);
11677 base = ((mask >> 4) & 3) * nelt2;
11678 for (i = 0; i < nelt2; ++i)
11679 perm[i + nelt2] = GEN_INT (base + i);
11681 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11682 operands[1], operands[2]);
11683 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11684 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11685 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11691 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11692 ;; means that in order to represent this properly in rtl we'd have to
11693 ;; nest *another* vec_concat with a zero operand and do the select from
11694 ;; a 4x wide vector. That doesn't seem very nice.
11695 (define_insn "*avx_vperm2f128<mode>_full"
11696 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11697 (unspec:AVX256MODE2P
11698 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11699 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11700 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11701 UNSPEC_VPERMIL2F128))]
11703 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11704 [(set_attr "type" "sselog")
11705 (set_attr "prefix_extra" "1")
11706 (set_attr "length_immediate" "1")
11707 (set_attr "prefix" "vex")
11708 (set_attr "mode" "V8SF")])
11710 (define_insn "*avx_vperm2f128<mode>_nozero"
11711 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11712 (vec_select:AVX256MODE2P
11713 (vec_concat:<ssedoublesizemode>
11714 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11715 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11716 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11717 [(match_operand 4 "const_int_operand" "")])))]
11720 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11721 operands[3] = GEN_INT (mask);
11722 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11724 [(set_attr "type" "sselog")
11725 (set_attr "prefix_extra" "1")
11726 (set_attr "length_immediate" "1")
11727 (set_attr "prefix" "vex")
11728 (set_attr "mode" "V8SF")])
11730 (define_expand "avx_vinsertf128<mode>"
11731 [(match_operand:AVX256MODE 0 "register_operand" "")
11732 (match_operand:AVX256MODE 1 "register_operand" "")
11733 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11734 (match_operand:SI 3 "const_0_to_1_operand" "")]
11737 switch (INTVAL (operands[3]))
11740 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11744 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11748 gcc_unreachable ();
11753 (define_insn "vec_set_lo_<mode>"
11754 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11755 (vec_concat:AVX256MODE4P
11756 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11757 (vec_select:<avxhalfvecmode>
11758 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11759 (parallel [(const_int 2) (const_int 3)]))))]
11761 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11762 [(set_attr "type" "sselog")
11763 (set_attr "prefix_extra" "1")
11764 (set_attr "length_immediate" "1")
11765 (set_attr "prefix" "vex")
11766 (set_attr "mode" "V8SF")])
11768 (define_insn "vec_set_hi_<mode>"
11769 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11770 (vec_concat:AVX256MODE4P
11771 (vec_select:<avxhalfvecmode>
11772 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11773 (parallel [(const_int 0) (const_int 1)]))
11774 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11776 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11777 [(set_attr "type" "sselog")
11778 (set_attr "prefix_extra" "1")
11779 (set_attr "length_immediate" "1")
11780 (set_attr "prefix" "vex")
11781 (set_attr "mode" "V8SF")])
11783 (define_insn "vec_set_lo_<mode>"
11784 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11785 (vec_concat:AVX256MODE8P
11786 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11787 (vec_select:<avxhalfvecmode>
11788 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11789 (parallel [(const_int 4) (const_int 5)
11790 (const_int 6) (const_int 7)]))))]
11792 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11793 [(set_attr "type" "sselog")
11794 (set_attr "prefix_extra" "1")
11795 (set_attr "length_immediate" "1")
11796 (set_attr "prefix" "vex")
11797 (set_attr "mode" "V8SF")])
11799 (define_insn "vec_set_hi_<mode>"
11800 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11801 (vec_concat:AVX256MODE8P
11802 (vec_select:<avxhalfvecmode>
11803 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11804 (parallel [(const_int 0) (const_int 1)
11805 (const_int 2) (const_int 3)]))
11806 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11808 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11809 [(set_attr "type" "sselog")
11810 (set_attr "prefix_extra" "1")
11811 (set_attr "length_immediate" "1")
11812 (set_attr "prefix" "vex")
11813 (set_attr "mode" "V8SF")])
11815 (define_insn "vec_set_lo_v16hi"
11816 [(set (match_operand:V16HI 0 "register_operand" "=x")
11818 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11820 (match_operand:V16HI 1 "register_operand" "x")
11821 (parallel [(const_int 8) (const_int 9)
11822 (const_int 10) (const_int 11)
11823 (const_int 12) (const_int 13)
11824 (const_int 14) (const_int 15)]))))]
11826 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11827 [(set_attr "type" "sselog")
11828 (set_attr "prefix_extra" "1")
11829 (set_attr "length_immediate" "1")
11830 (set_attr "prefix" "vex")
11831 (set_attr "mode" "V8SF")])
11833 (define_insn "vec_set_hi_v16hi"
11834 [(set (match_operand:V16HI 0 "register_operand" "=x")
11837 (match_operand:V16HI 1 "register_operand" "x")
11838 (parallel [(const_int 0) (const_int 1)
11839 (const_int 2) (const_int 3)
11840 (const_int 4) (const_int 5)
11841 (const_int 6) (const_int 7)]))
11842 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11844 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11845 [(set_attr "type" "sselog")
11846 (set_attr "prefix_extra" "1")
11847 (set_attr "length_immediate" "1")
11848 (set_attr "prefix" "vex")
11849 (set_attr "mode" "V8SF")])
11851 (define_insn "vec_set_lo_v32qi"
11852 [(set (match_operand:V32QI 0 "register_operand" "=x")
11854 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11856 (match_operand:V32QI 1 "register_operand" "x")
11857 (parallel [(const_int 16) (const_int 17)
11858 (const_int 18) (const_int 19)
11859 (const_int 20) (const_int 21)
11860 (const_int 22) (const_int 23)
11861 (const_int 24) (const_int 25)
11862 (const_int 26) (const_int 27)
11863 (const_int 28) (const_int 29)
11864 (const_int 30) (const_int 31)]))))]
11866 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11867 [(set_attr "type" "sselog")
11868 (set_attr "prefix_extra" "1")
11869 (set_attr "length_immediate" "1")
11870 (set_attr "prefix" "vex")
11871 (set_attr "mode" "V8SF")])
11873 (define_insn "vec_set_hi_v32qi"
11874 [(set (match_operand:V32QI 0 "register_operand" "=x")
11877 (match_operand:V32QI 1 "register_operand" "x")
11878 (parallel [(const_int 0) (const_int 1)
11879 (const_int 2) (const_int 3)
11880 (const_int 4) (const_int 5)
11881 (const_int 6) (const_int 7)
11882 (const_int 8) (const_int 9)
11883 (const_int 10) (const_int 11)
11884 (const_int 12) (const_int 13)
11885 (const_int 14) (const_int 15)]))
11886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11888 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11889 [(set_attr "type" "sselog")
11890 (set_attr "prefix_extra" "1")
11891 (set_attr "length_immediate" "1")
11892 (set_attr "prefix" "vex")
11893 (set_attr "mode" "V8SF")])
11895 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11896 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11898 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11899 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11903 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11904 [(set_attr "type" "sselog1")
11905 (set_attr "prefix_extra" "1")
11906 (set_attr "prefix" "vex")
11907 (set_attr "mode" "<MODE>")])
11909 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11910 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11912 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11913 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11915 UNSPEC_MASKSTORE))]
11917 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11918 [(set_attr "type" "sselog1")
11919 (set_attr "prefix_extra" "1")
11920 (set_attr "prefix" "vex")
11921 (set_attr "mode" "<MODE>")])
11923 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11924 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11925 (unspec:AVX256MODE2P
11926 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11930 "&& reload_completed"
11933 rtx op1 = operands[1];
11935 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11937 op1 = gen_lowpart (<MODE>mode, op1);
11938 emit_move_insn (operands[0], op1);
11942 (define_expand "vec_init<mode>"
11943 [(match_operand:AVX256MODE 0 "register_operand" "")
11944 (match_operand 1 "" "")]
11947 ix86_expand_vector_init (false, operands[0], operands[1]);
11951 (define_insn "*vec_concat<mode>_avx"
11952 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11953 (vec_concat:AVX256MODE
11954 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11955 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11958 switch (which_alternative)
11961 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11963 switch (get_attr_mode (insn))
11966 return "vmovaps\t{%1, %x0|%x0, %1}";
11968 return "vmovapd\t{%1, %x0|%x0, %1}";
11970 return "vmovdqa\t{%1, %x0|%x0, %1}";
11973 gcc_unreachable ();
11976 [(set_attr "type" "sselog,ssemov")
11977 (set_attr "prefix_extra" "1,*")
11978 (set_attr "length_immediate" "1,*")
11979 (set_attr "prefix" "vex")
11980 (set_attr "mode" "<avxvecmode>")])
11982 (define_insn "vcvtph2ps"
11983 [(set (match_operand:V4SF 0 "register_operand" "=x")
11985 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11987 (parallel [(const_int 0) (const_int 1)
11988 (const_int 1) (const_int 2)])))]
11990 "vcvtph2ps\t{%1, %0|%0, %1}"
11991 [(set_attr "type" "ssecvt")
11992 (set_attr "prefix" "vex")
11993 (set_attr "mode" "V4SF")])
11995 (define_insn "*vcvtph2ps_load"
11996 [(set (match_operand:V4SF 0 "register_operand" "=x")
11997 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11998 UNSPEC_VCVTPH2PS))]
12000 "vcvtph2ps\t{%1, %0|%0, %1}"
12001 [(set_attr "type" "ssecvt")
12002 (set_attr "prefix" "vex")
12003 (set_attr "mode" "V8SF")])
12005 (define_insn "vcvtph2ps256"
12006 [(set (match_operand:V8SF 0 "register_operand" "=x")
12007 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12008 UNSPEC_VCVTPH2PS))]
12010 "vcvtph2ps\t{%1, %0|%0, %1}"
12011 [(set_attr "type" "ssecvt")
12012 (set_attr "prefix" "vex")
12013 (set_attr "mode" "V8SF")])
12015 (define_expand "vcvtps2ph"
12016 [(set (match_operand:V8HI 0 "register_operand" "")
12018 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12019 (match_operand:SI 2 "immediate_operand" "")]
12023 "operands[3] = CONST0_RTX (V4HImode);")
12025 (define_insn "*vcvtps2ph"
12026 [(set (match_operand:V8HI 0 "register_operand" "=x")
12028 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12029 (match_operand:SI 2 "immediate_operand" "N")]
12031 (match_operand:V4HI 3 "const0_operand" "")))]
12033 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12034 [(set_attr "type" "ssecvt")
12035 (set_attr "prefix" "vex")
12036 (set_attr "mode" "V4SF")])
12038 (define_insn "*vcvtps2ph_store"
12039 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12040 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12041 (match_operand:SI 2 "immediate_operand" "N")]
12042 UNSPEC_VCVTPS2PH))]
12044 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12045 [(set_attr "type" "ssecvt")
12046 (set_attr "prefix" "vex")
12047 (set_attr "mode" "V4SF")])
12049 (define_insn "vcvtps2ph256"
12050 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12051 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12052 (match_operand:SI 2 "immediate_operand" "N")]
12053 UNSPEC_VCVTPS2PH))]
12055 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12056 [(set_attr "type" "ssecvt")
12057 (set_attr "prefix" "vex")
12058 (set_attr "mode" "V8SF")])